From 6e81cc49b4097d8273eaef61f9148096683f6936 Mon Sep 17 00:00:00 2001 From: nashim-kayaga <56426944+nashim-kayaga@users.noreply.github.com> Date: Fri, 13 Mar 2020 12:29:55 +0300 Subject: [PATCH 1/4] Add files via upload KAYAGA NASHIM MILVAT NOTEBOOK AND csv --- Assignment Colab/KAYAGA NASHIM MILVAT.ipynb | 1 + Assignment Colab/report_bayes.csv | 759 ++++++++++++++++++++ 2 files changed, 760 insertions(+) create mode 100644 Assignment Colab/KAYAGA NASHIM MILVAT.ipynb create mode 100644 Assignment Colab/report_bayes.csv diff --git a/Assignment Colab/KAYAGA NASHIM MILVAT.ipynb b/Assignment Colab/KAYAGA NASHIM MILVAT.ipynb new file mode 100644 index 0000000..e6b0219 --- /dev/null +++ b/Assignment Colab/KAYAGA NASHIM MILVAT.ipynb @@ -0,0 +1 @@ +{"cells":[{"metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","trusted":true},"cell_type":"code","source":"# This Python 3 environment comes with many helpful analytics libraries installed\n# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python\n# For example, here's several helpful packages to load in \n\nimport numpy as np # linear algebra\nimport pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\nimport matplotlib.pyplot as plt\nimport seaborn as sns\n# Input data files are available in the \"../input/\" directory.\n# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n\nimport os\nfor dirname, _, filenames in os.walk('/kaggle/input'):\n for filename in filenames:\n print(os.path.join(dirname, filename))\n \n\n# Any results you write to the current directory are saved as output.","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"# References\n#### https://machinelearningmastery.com/evaluate-performance-machine-learning-algorithms-python-using-resampling/\n#### https://www.dataquest.io/blog/top-10-machine-learning-algorithms-for-beginners/\n#### https://monkeylearn.com/blog/introduction-to-support-vector-machines-svm/\n#### https://towardsdatascience.com/understanding-random-forest-58381e0602d2"},{"metadata":{"_uuid":"d629ff2d2480ee46fbb7e2d37f6b5fab8052498a","_cell_guid":"79c7e3d0-c299-4dcb-8224-4455121ee9b0","trusted":true},"cell_type":"code","source":"import warnings\nwarnings.filterwarnings('ignore')","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"test = pd.read_csv(\"../input/ace-class-assignment/Test.csv\")\ntest","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"test.shape","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"#read in the data\ndata = pd.read_csv(\"../input/ace-class-assignment/AMP_TrainSet.csv\")\ndata.head(5)","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":""},{"metadata":{},"cell_type":"markdown","source":"## Analyze data by describing\n\n#### This step helped me know which features are in my dataset, are they categorical or numerical.\n#### How many rows and columns does the dataset have\n#### The data types for the various features\n#### Checked whether the dataset has null or missing values"},{"metadata":{"trusted":true},"cell_type":"code","source":"#Check the dimensions to the number of rows and columns\ndata.shape","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"data.columns","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"data.dtypes","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"#Generate descriptive statistics that summarize the central tendency, dispersion, and shape of a dataset’s distribution, excluding NaN values\ndata.describe()","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"#number of null values in each column\ndata.isnull().sum()\n#since my data has no null values then its good to go","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":""},{"metadata":{},"cell_type":"markdown","source":"#### needed to know how balanced the class values are"},{"metadata":{"trusted":true},"cell_type":"code","source":"\ndata.groupby('CLASS').size().plot(kind='bar')","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"#### Its a good idea to review all the pairwise correlations of the attributes in the dataset because some machine learning algorithm like linear and logistic regression can suffer poor performance if there are highly correlated attributes in the dataset"},{"metadata":{"trusted":true},"cell_type":"code","source":"data.corr(method='pearson')","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"#### heat map to show the correlation of the data; plots that show the interactions between multiple variables in the dataset\n#### Correlation gives an indication of how related the changes are between two variables. If two variables change in the same direction they are positively correlated. If they change in opposite directions together (one goes up, one goes down), then they are negatively correlated. "},{"metadata":{"trusted":true},"cell_type":"code","source":"plt.figure(figsize=(8,8))\nsns.heatmap(data.corr(method='pearson'))\n","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"#### also checked the corelation in regards to the class since am trying to build a ML agorithm for that class"},{"metadata":{"trusted":true},"cell_type":"code","source":"\ndata.corr(method='pearson')['CLASS']","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"#### Most of my variables are positively skewed"},{"metadata":{"trusted":true},"cell_type":"code","source":" data.skew().plot(kind='bar')","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"## understanding data with visualization\n#### Data can be visualised in many ways that is univariate plots and multivariate plots #### Used the Histogram for univariate plot as shown below and the correlation matrix plot as the multivariate plot as shown above"},{"metadata":{},"cell_type":"markdown","source":"## Histogram\n#### This helps to understand each attribute of my dataset independently"},{"metadata":{},"cell_type":"markdown","source":"## Data pre-processing"},{"metadata":{"trusted":true},"cell_type":"code","source":"plt.figure(figsize=(18,18))\ndata.hist()\nplt.subplots_adjust(bottom=3, right=2, top=5)\nplt.show()","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"## Standardize data\n#### Standardization is a useful technique to transform attributes with a Gaussian distribution and differing means and standard deviations to a standard Gaussian distribution with a mean of 0 and a standard deviation of 1"},{"metadata":{"trusted":true},"cell_type":"code","source":"from sklearn.preprocessing import StandardScaler\narray = data.values\n#separate array into input and output components\nX = array[:,0:11]\nY = array[:,11]\nscaler = StandardScaler().fit(X)\nrescaledX = scaler.transform(X)\n# summarize transformed data\n#set_printoptions(precision=3)\nprint(rescaledX[0:5,:])","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"array = test.values\nscaler = StandardScaler().fit(array)\nrescaledt = scaler.transform(array)\n# summarize transformed data\n#set_printoptions(precision=3)\nprint(rescaledt[0:5,:])","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"## Feature selection\n\n"},{"metadata":{},"cell_type":"markdown","source":"#### it's the process of selecting a subset of relevant features for use in model construction"},{"metadata":{},"cell_type":"markdown","source":"### Chose Recursive Feature Elimination\n#### This is an automatic feature selection technique\n#### Used logistic regression it is a good baseline as it is fast to train and predict and scales well.\n"},{"metadata":{"trusted":true},"cell_type":"code","source":"from sklearn.feature_selection import RFE\nfrom sklearn.linear_model import LogisticRegression\n\narray = data.values\nX = array[:,0:11]\nY = array[:,11]\n# feature extraction\nmodel = LogisticRegression()\nrfe = RFE(model,8)\nfit = rfe.fit(X,Y)\nprint(\"Num Features:\", fit.n_features_)\nprint(\"Selected Features:\", fit.support_)\nprint(\"Feature Ranking:\", fit.ranking_)","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"X[:,fit.support_]","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"drop=data.drop(['FULL_AcidicMolPerc', 'FULL_DAYM780201', 'AS_DAYM780201'],axis=1)\ndrop","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"drop_test = test.drop(['FULL_AcidicMolPerc', 'FULL_DAYM780201', 'AS_DAYM780201'],axis=1)\ndrop_test","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"1. #### Decided to first use all the first\n"},{"metadata":{},"cell_type":"markdown","source":"# Evaluate the Performance of Machine Learning Algorithms with Resampling¶\n"},{"metadata":{},"cell_type":"markdown","source":"#### The best way to evaluate the performance of an algorithm would be to make predictions for new data to which you already know the answers."},{"metadata":{},"cell_type":"markdown","source":"## Split into Train and Test Sets"},{"metadata":{},"cell_type":"markdown","source":"#### This algorithm evaluation technique is very fast. It is ideal for large datasets where there is strong evidence that both splits of the data are representative of the underlying problem. Because of the speed, it is useful to use this approach when the algorithm you are investigating is slow to train.\n\n"},{"metadata":{"trusted":true},"cell_type":"code","source":"from sklearn.model_selection import train_test_split\nfrom sklearn.linear_model import LogisticRegression\n\narray = data.values\nX = array[:,0:11]\nY = array[:,11]\ntest_size = 0.30\nseed = 7\nX_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=test_size,\nrandom_state=seed)\nmodel = LogisticRegression()\nmodel.fit(X_train, Y_train)\nresult = model.score(X_test, Y_test)\nprint(\"Accuracy: \", (result*100.0))\n\n\nmodel.fit(X,Y)\noutput = model.predict(test.values)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(X),Y)\nprint('MCC:',mcc)\n \nreport = pd.DataFrame(output)\nreport.columns = ['CLASS']\nreport.index.name = \"Index\"\nreport['CLASS']=report['CLASS'].map({0.0:False, 1.0:True})\nreport.to_csv(\"report.csv\")\n\nprint(report['CLASS'].unique())\nprint('False: ',report.groupby('CLASS').size()[0].sum())\nprint('True: ',report.groupby('CLASS').size()[1].sum())\n","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"## K-fold Cross Validation"},{"metadata":{},"cell_type":"markdown","source":"#### It is more accurate because the algorithm is trained and evaluated multiple times on different data."},{"metadata":{"trusted":true},"cell_type":"code","source":"from sklearn.model_selection import KFold\nfrom sklearn.model_selection import cross_val_score\n\nnum_folds = 10 #number of folds to use\nseed = 7 #reproducibility\n\nkfold = KFold(n_splits=num_folds, random_state=seed)\nmodel = LogisticRegression()\nresults = cross_val_score(model, X, Y, cv=kfold)\n\nprint(f\"Accuracy:\", (results.mean()*100.0, results.std()*100.0))\n\n\nmodel.fit(X,Y)\noutput = model.predict(test.values)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(X),Y)\nprint('MCC:',mcc)\n \nreport_kf = pd.DataFrame(output)\nreport_kf.columns = ['CLASS']\nreport_kf.index.name = \"Index\"\nreport_kf['CLASS']=report_kf['CLASS'].map({0.0:False, 1.0:True})\nreport_kf.to_csv(\"report_kf.csv\")\n\nprint(report_kf['CLASS'].unique())\nprint('False: ',report_kf.groupby('CLASS').size()[0].sum())\nprint('True: ',report_kf.groupby('CLASS').size()[1].sum())\n\n","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"## Leave One Out Cross Validation"},{"metadata":{"trusted":true},"cell_type":"code","source":"from sklearn.model_selection import LeaveOneOut\nfrom sklearn.model_selection import cross_val_score\n\nnum_folds = 10\nloocv = LeaveOneOut()\nmodel = LogisticRegression()\nresults = cross_val_score(model, X, Y, cv=loocv)\nprint(\"Accuracy:\", (results.mean()*100.0, results.std()*100.0))\n\n\nmodel.fit(X,Y)\noutput = model.predict(test.values)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(X),Y)\nprint('MCC:',mcc)\n \nreport_l = pd.DataFrame(output)\nreport_l.columns = ['CLASS']\nreport_l.index.name = \"Index\"\nreport_l['CLASS']=report_l['CLASS'].map({0.0:False, 1.0:True})\nreport_l.to_csv(\"report_l.csv\")\n\nprint(report_l['CLASS'].unique())\nprint('False: ',report_l.groupby('CLASS').size()[0].sum())\nprint('True: ',report_l.groupby('CLASS').size()[1].sum())\n","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"## Repeated Random Test-Train Splits"},{"metadata":{},"cell_type":"markdown","source":"#### Creates a random split of the data like the train/test split , but repeats the process of splitting and evaluation of the algorithm multiple times, like cross validation. Repeated random splits can be useful intermediates when trying to balance variance in the estimated performance, model training speed and dataset size\n#### In this I prefered using Repeated Random Test_Train Splits because when you look at the dataset the zeros are one side and the ones on the otherside in the 'class' column. So I would prefer to first shuffle the data and then split it to reduce on the bias"},{"metadata":{"trusted":true},"cell_type":"code","source":"from sklearn.model_selection import ShuffleSplit\nfrom sklearn.model_selection import cross_val_score\n\nn_splits = 10\ntest_size = 0.30\nseed = 7\nkfold = ShuffleSplit(n_splits=n_splits, test_size=test_size, random_state=seed)\nmodel = LogisticRegression()\nresults = cross_val_score(model, X, Y, cv=kfold)\nprint(\"Accuracy: \" , (results.mean()*100.0, results.std()*100.0))\n\n\nmodel.fit(X,Y)\noutput = model.predict(test.values)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(X),Y)\nprint('MCC:',mcc)\n \nreport_rrt = pd.DataFrame(output)\nreport_rrt.columns = ['CLASS']\nreport_rrt.index.name = \"Index\"\nreport_rrt['CLASS']=report_rrt['CLASS'].map({0.0:False, 1.0:True})\nreport_rrt.to_csv(\"report_rrt.csv\")\n\nprint(report_rrt['CLASS'].unique())\nprint('False: ',report_rrt.groupby('CLASS').size()[0].sum())\nprint('True: ',report_rrt.groupby('CLASS').size()[1].sum())\n\n","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"# Machine Learning Algorithm Performance Metrics"},{"metadata":{},"cell_type":"markdown","source":"## Algorithms Overview\n### linear machine learning algorithms:\n\n Logistic Regression.\n Linear Discriminant Analysis.\n### onlinear machine learning algorithms\n\n k-Nearest Neighbors.\n Naive Bayes.\n Classication and Regression Trees.\n Support Vector Machines.\n"},{"metadata":{},"cell_type":"markdown","source":"## Linear Machine Learning Algorithms"},{"metadata":{},"cell_type":"markdown","source":"### Logistic Regression"},{"metadata":{},"cell_type":"markdown","source":"#### Logistic regression is best suited for binary classification: data sets where y = 0 or 1"},{"metadata":{},"cell_type":"markdown","source":"### Using standardized data"},{"metadata":{"trusted":true},"cell_type":"code","source":"# Logistic regression on standardized data\nnum_folds = 10\nkfold = KFold(n_splits=10, random_state=7)\nmodel = LogisticRegression()\nresults = cross_val_score(model, X, Y, cv=kfold)\nprint(results.mean())\n\nmodel.fit(rescaledX,Y)\noutput = model.predict(rescaledt)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(X),Y)\nprint('MCC:',mcc)\n \nreport_scaled = pd.DataFrame(output)\nreport_scaled.columns = ['CLASS']\nreport_scaled.index.name = \"Index\"\nreport_scaled['CLASS']=report_scaled['CLASS'].map({0.0:False, 1.0:True})\nreport_scaled.to_csv(\"report_scaled.csv\")\n\nprint(report_scaled['CLASS'].unique())\nprint('False: ',report_scaled.groupby('CLASS').size()[0].sum())\nprint('True: ',report_scaled.groupby('CLASS').size()[1].sum())\n","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"# Logistic Regression Classification\n\nnum_folds = 10\nkfold = KFold(n_splits=10, random_state=7)\nmodel = LogisticRegression()\nresults = cross_val_score(model, X, Y, cv=kfold)\nprint(results.mean())\n\nmodel.fit(X,Y)\noutput = model.predict(test.values)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(X),Y)\nprint('MCC:',mcc)\n \nmy_report = pd.DataFrame(output)\nmy_report.columns = ['CLASS']\nmy_report.index.name = \"Index\"\nmy_report['CLASS']=my_report['CLASS'].map({0.0:False, 1.0:True})\nmy_report.to_csv(\"report_XGB.csv\")\n\nprint(my_report['CLASS'].unique())\nprint('False: ',my_report.groupby('CLASS').size()[0].sum())\nprint('True: ',my_report.groupby('CLASS').size()[1].sum())","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":""},{"metadata":{},"cell_type":"markdown","source":"## Linear Discriminant Analysis¶\n\n"},{"metadata":{"trusted":true},"cell_type":"code","source":"from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n\nnum_folds = 10\nkfold = KFold(n_splits=10, random_state=7)\nmodel = LinearDiscriminantAnalysis()\nresults = cross_val_score(model, X, Y, cv=kfold)\nprint(results.mean())\n\n\nmodel.fit(X,Y)\noutput = model.predict(test.values)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(X),Y)\nprint('MCC:',mcc)\n \nlda_report = pd.DataFrame(output)\nlda_report.columns = ['CLASS']\nlda_report.index.name = \"Index\"\nlda_report['CLASS']=lda_report['CLASS'].map({0.0:False, 1.0:True})\nlda_report.to_csv(\"ldareport.csv\")\n\nprint(lda_report['CLASS'].unique())\nprint('False: ',lda_report.groupby('CLASS').size()[0].sum())\nprint('True: ',lda_report.groupby('CLASS').size()[1].sum())\n\n","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"## Nonlinear Machine Learning Algorithms"},{"metadata":{},"cell_type":"markdown","source":"### k-Nearest Neighbors"},{"metadata":{"trusted":true},"cell_type":"code","source":"from sklearn.neighbors import KNeighborsClassifier\nnum_folds = 10\nkfold = KFold(n_splits=10, random_state=7)\nmodel = KNeighborsClassifier()\nresults = cross_val_score(model, X, Y, cv=kfold)\nprint(results.mean())\n\n\n\nmodel.fit(X,Y)\noutput = model.predict(test.values)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(X),Y)\nprint('MCC:',mcc)\n \nreport_k = pd.DataFrame(output)\nreport_k.columns = ['CLASS']\nreport_k.index.name = \"Index\"\nreport_k['CLASS']=report_k['CLASS'].map({0.0:False, 1.0:True})\nreport_k.to_csv(\"report_k.csv\")\n\n\nprint(report_k['CLASS'].unique())\nprint('False: ',report_k.groupby('CLASS').size()[0].sum())\nprint('True: ',report_k.groupby('CLASS').size()[1].sum())","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"### Naive Bayes"},{"metadata":{},"cell_type":"markdown","source":"### Tried using Standardised data on Naive Bayes\n\n### When I predicted Naive Bayes on Standardised data gave me a score of 0.98235, after feature selection it gave 0.90 and on unstandardised data it gave a score of 0.9959"},{"metadata":{"trusted":true},"cell_type":"code","source":"# Naive Bayes on standardised data\nfrom sklearn.naive_bayes import GaussianNB\n\nkfold = KFold(n_splits=10, random_state=7)\nmodel = GaussianNB()\nresults = cross_val_score(model, X, Y, cv=kfold)\nprint(results.mean())\n\n\nmodel.fit(rescaledX,Y)\noutput = model.predict(rescaledt)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(X),Y)\nprint('MCC:',mcc)\n \nreport_rebayes = pd.DataFrame(output)\nreport_rebayes.columns = ['CLASS']\nreport_rebayes.index.name = \"Index\"\nreport_rebayes['CLASS']=report_rebayes['CLASS'].map({0.0:False, 1.0:True})\nreport_rebayes.to_csv(\"report_rebayes.csv\")\n\n\nprint(report_rebayes['CLASS'].unique())\nprint('False: ',report_rebayes.groupby('CLASS').size()[0].sum())\nprint('True: ',report_rebayes.groupby('CLASS').size()[1].sum())","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"## Naive Bayes on selected features"},{"metadata":{"trusted":true},"cell_type":"code","source":"# Naive Bayes on selected features\n\narray = data.values\nX = array[:,0:11]\nY = array[:,11]\n\nselectedX = X[:,fit.support_]\n\narray2 =test.values\nselectedT = array2[:,fit.support_]\n\nkfold = KFold(n_splits=10, random_state=7)\nmodel = GaussianNB()\nresults = cross_val_score(model, selectedX, Y, cv=kfold)\nprint(results.mean())\n\n\nmodel.fit(selectedX,Y)\noutput = model.predict(selectedT)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(selectedX),Y)\nprint('MCC:',mcc)\n \nreport_sel = pd.DataFrame(output)\nreport_sel.columns = ['CLASS']\nreport_sel.index.name = \"Index\"\nreport_sel['CLASS']=report_sel['CLASS'].map({0.0:False, 1.0:True})\nreport_sel.to_csv(\"report_sel.csv\")\n\n\nprint(report_sel['CLASS'].unique())\nprint('False: ',report_sel.groupby('CLASS').size()[0].sum())\nprint('True: ',report_sel.groupby('CLASS').size()[1].sum())\n","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"from sklearn.naive_bayes import GaussianNB\n\nkfold = KFold(n_splits=10, random_state=7)\nmodel = GaussianNB()\nresults = cross_val_score(model, X, Y, cv=kfold)\nprint(results.mean())\n\n\nmodel.fit(X,Y)\noutput = model.predict(test.values)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(X),Y)\nprint('MCC:',mcc)\n \nreport_bayes = pd.DataFrame(output)\nreport_bayes.columns = ['CLASS']\nreport_bayes.index.name = \"Index\"\nreport_bayes['CLASS']=report_bayes['CLASS'].map({0.0:False, 1.0:True})\nreport_bayes.to_csv(\"report_bayes.csv\")\n\n\nprint(report_bayes['CLASS'].unique())\nprint('False: ',report_bayes.groupby('CLASS').size()[0].sum())\nprint('True: ',report_bayes.groupby('CLASS').size()[1].sum())","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"### Classiffication and Regression Trees"},{"metadata":{},"cell_type":"markdown","source":"#### used for classification or regression predictive modeling problems"},{"metadata":{"trusted":true},"cell_type":"code","source":"from sklearn.tree import DecisionTreeClassifier\nkfold = KFold(n_splits=10, random_state=7)\nmodel = DecisionTreeClassifier()\nresults = cross_val_score(model, X, Y, cv=kfold)\nprint(results.mean())\n\n\nmodel.fit(X,Y)\noutput = model.predict(test.values)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(X),Y)\nprint('MCC:',mcc)\n \nreport_tree = pd.DataFrame(output)\nreport_tree.columns = ['CLASS']\nreport_tree.index.name = \"Index\"\nreport_tree['CLASS']=report_tree['CLASS'].map({0.0:False, 1.0:True})\nreport_tree.to_csv(\"report_tree.csv\")\n\n\nprint(report_tree['CLASS'].unique())\nprint('False: ',report_tree.groupby('CLASS').size()[0].sum())\nprint('True: ',report_tree.groupby('CLASS').size()[1].sum())","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"### Support Vector Machines "},{"metadata":{},"cell_type":"markdown","source":"#### A support vector machine (SVM) is a supervised machine learning model that uses classification algorithms for two-group classification problems"},{"metadata":{"trusted":true},"cell_type":"code","source":"from sklearn.svm import SVC\n\nkfold = KFold(n_splits=10, random_state=7)\nmodel = SVC()\nresults = cross_val_score(model, X, Y, cv=kfold)\nprint(results.mean())\n\nmodel.fit(X,Y)\noutput = model.predict(test.values)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(X),Y)\nprint('MCC:',mcc)\n \nreport_svm = pd.DataFrame(output)\nreport_svm.columns = ['CLASS']\nreport_svm.index.name = \"Index\"\nreport_svm['CLASS']=report_svm['CLASS'].map({0.0:False, 1.0:True})\nreport_svm.to_csv(\"report_svm.csv\")\n\n\nprint(report_svm['CLASS'].unique())\nprint('False: ',report_svm.groupby('CLASS').size()[0].sum())\nprint('True: ',report_svm.groupby('CLASS').size()[1].sum())\n\n\n","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"# Combine Models Into Ensemble Predictions\n\nThe three most popular methods for combining the predictions from different models are:\n \n Bagging\n Boosting\n Voting"},{"metadata":{},"cell_type":"markdown","source":"> # BoostingAlgorithms"},{"metadata":{},"cell_type":"markdown","source":"#### These seek to improve the prediction power by training a sequence of weak models, each compensating the weaknesses of its predecessors.\n"},{"metadata":{},"cell_type":"markdown","source":"## AdaBoost"},{"metadata":{},"cell_type":"markdown","source":"#### This is specifically designed for classification problems"},{"metadata":{"trusted":true},"cell_type":"code","source":"# AdaBoost Classification\nfrom pandas import read_csv\nfrom sklearn.model_selection import KFold\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn.ensemble import AdaBoostClassifier\n\n\nX = array[:,0:11]\nY = array[:,11]\n\nnum_trees = 39\nseed=10\n\nkfold = KFold(n_splits=10, random_state=seed)\n\nmodel = AdaBoostClassifier(n_estimators=num_trees, random_state=seed)\nresults = cross_val_score(model, X, Y, cv=kfold)\n\nprint(results.mean())\n\nmodel.fit(X,Y)\noutput = model.predict(test.values)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(X),Y)\nprint('MCC:',mcc)\n \nreport_ada = pd.DataFrame(output)\nreport_ada.columns = ['CLASS']\nreport_ada.index.name = \"Index\"\nreport_ada['CLASS']=report_ada['CLASS'].map({0.0:False, 1.0:True})\nreport_ada.to_csv(\"report_ada.csv\")\n\n\nprint(report_ada['CLASS'].unique())\nprint('False: ',report_ada.groupby('CLASS').size()[0].sum())\nprint('True: ',report_ada.groupby('CLASS').size()[1].sum())\n\n","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"## Bagging Algorithms"},{"metadata":{},"cell_type":"markdown","source":"#### Bagging is used with decision trees where it significantly raises the stability of models in the reduction of variance and improving accuracy, which eliminates the challenge of overfitting."},{"metadata":{},"cell_type":"markdown","source":"## Bagged Decision Trees"},{"metadata":{"trusted":true},"cell_type":"code","source":"# Bagged Decision Trees for Classification\nfrom pandas import read_csv\nfrom sklearn.model_selection import KFold\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn.ensemble import BaggingClassifier\nfrom sklearn.tree import DecisionTreeClassifier\n\n#split the data in portions\nX = array[:,0:11]\nY = array[:,11]\nseed = 7 #duplication\n\n#split according to cross validation\nkfold = KFold(n_splits=10, random_state=seed)\n\n#initialize the model\ncart = DecisionTreeClassifier()\n\n#bagging\nnum_trees = 250\n\n#model\nmodel = BaggingClassifier(base_estimator=cart, n_estimators=num_trees, random_state=seed)\n\nresults = cross_val_score(model, X, Y, cv=kfold)\nprint(results.mean())\n\nmodel.fit(X,Y)\noutput = model.predict(test.values)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(X),Y)\nprint('MCC:',mcc)\n \nreport_bag = pd.DataFrame(output)\nreport_bag.columns = ['CLASS']\nreport_bag.index.name = \"Index\"\nreport_bag['CLASS']=report_bag['CLASS'].map({0.0:False, 1.0:True})\nreport_bag.to_csv(\"report_bag.csv\")\n\n\nprint(report_bag['CLASS'].unique())\nprint('False: ',report_bag.groupby('CLASS').size()[0].sum())\nprint('True: ',report_bag.groupby('CLASS').size()[1].sum())\n\n","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"## Random Forest"},{"metadata":{"trusted":true},"cell_type":"code","source":"# Random Forest Classification\nfrom pandas import read_csv\nfrom sklearn.model_selection import KFold\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn.ensemble import RandomForestClassifier\n\n\nX = array[:,0:11]\nY = array[:,11]\n\nnum_trees = 1000\n\nmax_features = 3\n\nkfold = KFold(n_splits=10, random_state=7)\nmodel = RandomForestClassifier(n_estimators=num_trees, max_features=max_features)\nresults = cross_val_score(model, X, Y, cv=kfold)\nprint(results.mean())\n\nmodel.fit(X,Y)\noutput = model.predict(test.values)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(X),Y)\nprint('MCC:',mcc)\n \nreport_rf = pd.DataFrame(output)\nreport_rf.columns = ['CLASS']\nreport_rf.index.name = \"Index\"\nreport_rf['CLASS']=report_rf['CLASS'].map({0.0:False, 1.0:True})\nreport_rf.to_csv(\"report_rf.csv\")\n\n\nprint(report_rf['CLASS'].unique())\nprint('False: ',report_rf.groupby('CLASS').size()[0].sum())\nprint('True: ',report_rf.groupby('CLASS').size()[1].sum())\n","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":""},{"metadata":{},"cell_type":"markdown","source":"## Extra Trees"},{"metadata":{"trusted":true},"cell_type":"code","source":"from sklearn.ensemble import ExtraTreesClassifier\n\nX = array[:,0:11]\nY = array[:,11]\n\nnum_trees = 100\nmax_features = 7\n\nkfold = KFold(n_splits=10, random_state=7)\n\nmodel = ExtraTreesClassifier(n_estimators=num_trees, max_features=max_features)\n\nresults = cross_val_score(model, X, Y, cv=kfold)\n\nprint(results.mean())\n","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"## Voting Ensemble"},{"metadata":{"trusted":true},"cell_type":"code","source":"# Voting Ensemble for Classification\nfrom pandas import read_csv\nfrom sklearn.model_selection import KFold\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.tree import DecisionTreeClassifier\nfrom xgboost import XGBClassifier\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.svm import SVC\nfrom sklearn.ensemble import VotingClassifier\n\n\nX = array[:,0:11]\nY = array[:,11]\nkfold = KFold(n_splits=10, random_state=7)\n\n# create the sub models\nestimators = []\nmodel1 = LogisticRegression()\nestimators.append(('logistic', model1))\n\nmodel2 = DecisionTreeClassifier()\nestimators.append(('cart', model2))\n\nmodel3 = SVC()\nestimators.append(('svm', model3))\n\nmodel4 = XGBClassifier()\nestimators.append(('xgb', model4))\n\nmodel5 = RandomForestClassifier()\nestimators.append(('rfc', model5))\n\n# create the ensemble model\nensemble = VotingClassifier(estimators)\nresults = cross_val_score(ensemble, X, Y, cv=kfold)\nprint(results.mean())\n\n\nmodel.fit(X,Y)\noutput = model.predict(test.values)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(X),Y)\nprint('MCC:',mcc)\n \nreport_v = pd.DataFrame(output)\nreport_v.columns = ['CLASS']\nreport_v.index.name = \"Index\"\nreport_v['CLASS']=report_v['CLASS'].map({0.0:False, 1.0:True})\nreport_v.to_csv(\"report_v.csv\")\n\n\nprint(report_v['CLASS'].unique())\nprint('False: ',report_v.groupby('CLASS').size()[0].sum())\nprint('True: ',report_v.groupby('CLASS').size()[1].sum())\n\n\n","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"## comparing the algorithms"},{"metadata":{"trusted":true},"cell_type":"code","source":"\n# prepare models and add them to a list\nfrom matplotlib import pyplot\n\nmodels = []\nmodels.append(('LR', LogisticRegression(solver='liblinear', multi_class='ovr')))\nmodels.append(('LDA', LinearDiscriminantAnalysis()))\nmodels.append(('KNN', KNeighborsClassifier()))\nmodels.append(('CART', DecisionTreeClassifier()))\nmodels.append(('NB', GaussianNB()))\nmodels.append(('SVM', SVC(gamma='auto')))\nmodels.append(('ETC', ExtraTreesClassifier()))\nmodels.append(('RFC', RandomForestClassifier()))\n\n# evaluate each model in turn\nresults = []\nnames = []\nscoring = 'accuracy'\n\nfor name, model in models:\n kfold = KFold(n_splits=10, random_state=7)\n cv_results = cross_val_score(model, X, Y, cv=kfold, scoring=scoring)\n results.append(cv_results)\n names.append(name)\n msg = (name, cv_results.mean(), cv_results.std())\n print(msg)\n\n# boxplot algorithm comparison\nfig = pyplot.figure()\nfig.suptitle('Algorithm Comparison')\nax = fig.add_subplot(111)\npyplot.boxplot(results)\nax.set_xticklabels(names)\npyplot.show()","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"# '''''''''''''''''''''''''''''''END''''''''''''''''''''''''''''''"},{"metadata":{},"cell_type":"markdown","source":""},{"metadata":{},"cell_type":"markdown","source":""}],"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"name":"python","version":"3.6.4","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat":4,"nbformat_minor":4} \ No newline at end of file diff --git a/Assignment Colab/report_bayes.csv b/Assignment Colab/report_bayes.csv new file mode 100644 index 0000000..d5d0b48 --- /dev/null +++ b/Assignment Colab/report_bayes.csv @@ -0,0 +1,759 @@ +Index,CLASS +0,True +1,True +2,True +3,True +4,False +5,True +6,True +7,True +8,True +9,True +10,True +11,True +12,True +13,False +14,False +15,True +16,False +17,True +18,True +19,True +20,True +21,True +22,True +23,True +24,False +25,True +26,True +27,True +28,True +29,False +30,True +31,True +32,False +33,True +34,False +35,True +36,True +37,True +38,True +39,True +40,True +41,True +42,True +43,True +44,True +45,False +46,True +47,True +48,False +49,True +50,True +51,False +52,False +53,True +54,False +55,True +56,True +57,True +58,True +59,True +60,True +61,False +62,True +63,True +64,True +65,True +66,True +67,True +68,True +69,True +70,True +71,True +72,True +73,True +74,True +75,True +76,False +77,True +78,True +79,True +80,True +81,True +82,True +83,True +84,True +85,False +86,False +87,True +88,True +89,True +90,False +91,True +92,True +93,True +94,True +95,False +96,False +97,True +98,True +99,True +100,False +101,True +102,True +103,True +104,True +105,True +106,True +107,True +108,True +109,True +110,True +111,True +112,True +113,False +114,True +115,True +116,True +117,True +118,True +119,True +120,True +121,True +122,True +123,True +124,True +125,True +126,True +127,True +128,True +129,True +130,True +131,True +132,False +133,True +134,True +135,True +136,True +137,True +138,True +139,True +140,True +141,True +142,True +143,True +144,True +145,True +146,True +147,True +148,True +149,True +150,True +151,True +152,False +153,True +154,True +155,True +156,True +157,True +158,True +159,True +160,True +161,False +162,True +163,True +164,True +165,True +166,True +167,True +168,True +169,False +170,True +171,True +172,True +173,True +174,True +175,True +176,True +177,True +178,True +179,True +180,True +181,True +182,True +183,True +184,True +185,True +186,True +187,True +188,True +189,True +190,True +191,True +192,True +193,True +194,True +195,True +196,True +197,True +198,False +199,True +200,True +201,True +202,True +203,True +204,True +205,True +206,True +207,True +208,True +209,True +210,True +211,True +212,True +213,True +214,True +215,True +216,True +217,True +218,True +219,False +220,True +221,True +222,True +223,True +224,True +225,True +226,True +227,True +228,True +229,True +230,True +231,True +232,True +233,True +234,True +235,True +236,True +237,True +238,True +239,True +240,True +241,True +242,True +243,False +244,True +245,True +246,True +247,True +248,True +249,True +250,True +251,True +252,True +253,True +254,True +255,True +256,True +257,True +258,True +259,False +260,True +261,True +262,True +263,True +264,True +265,True +266,True +267,True +268,True +269,True +270,True +271,True +272,True +273,True +274,True +275,True +276,True +277,True +278,True +279,True +280,True +281,True +282,True +283,True +284,True +285,True +286,True +287,True +288,True +289,True +290,True +291,True +292,True +293,True +294,True +295,True +296,False +297,True +298,True +299,False +300,True +301,True +302,True +303,True +304,True +305,True +306,True +307,True +308,True +309,True +310,True +311,True +312,True +313,True +314,True +315,True +316,True +317,True +318,True +319,True +320,True +321,True +322,True +323,True +324,True +325,True +326,True +327,True +328,True +329,True +330,True +331,True +332,True +333,True +334,True +335,True +336,True +337,True +338,True +339,True +340,True +341,True +342,True +343,True +344,True +345,True +346,True +347,True +348,True +349,True +350,True +351,True +352,True +353,False +354,True +355,True +356,True +357,True +358,True +359,True +360,True +361,True +362,True +363,True +364,True +365,True +366,True +367,True +368,True +369,True +370,True +371,True +372,True +373,True +374,True +375,True +376,True +377,True +378,True +379,False +380,False +381,False +382,False +383,False +384,False +385,False +386,False +387,False +388,False +389,False +390,False +391,False +392,False +393,False +394,False +395,False +396,False +397,False +398,False +399,False +400,False +401,False +402,True +403,False +404,False +405,True +406,False +407,False +408,False +409,False +410,False +411,True +412,False +413,False +414,False +415,False +416,False +417,False +418,False +419,False +420,True +421,False +422,False +423,False +424,False +425,False +426,False +427,False +428,False +429,False +430,False +431,False +432,False +433,False +434,False +435,False +436,False +437,False +438,False +439,False +440,False +441,False +442,False +443,False +444,False +445,False +446,False +447,False +448,False +449,False +450,True +451,False +452,False +453,False +454,False +455,True +456,False +457,False +458,False +459,False +460,False +461,False +462,False +463,False +464,False +465,False +466,False +467,False +468,False +469,False +470,True +471,False +472,False +473,False +474,False +475,False +476,False +477,False +478,False +479,False +480,False +481,False +482,False +483,False +484,False +485,False +486,False +487,False +488,False +489,False +490,False +491,False +492,False +493,False +494,True +495,True +496,False +497,False +498,False +499,False +500,False +501,False +502,False +503,False +504,False +505,False +506,False +507,False +508,True +509,False +510,False +511,False +512,False +513,False +514,False +515,False +516,False +517,False +518,False +519,False +520,True +521,False +522,False +523,False +524,False +525,False +526,False +527,False +528,False +529,False +530,False +531,False +532,False +533,False +534,False +535,False +536,False +537,False +538,False +539,False +540,False +541,False +542,False +543,True +544,False +545,False +546,False +547,False +548,False +549,False +550,True +551,False +552,False +553,False +554,True +555,True +556,False +557,True +558,False +559,True +560,False +561,False +562,False +563,False +564,False +565,False +566,False +567,False +568,False +569,False +570,False +571,False +572,False +573,False +574,False +575,False +576,False +577,False +578,False +579,False +580,False +581,False +582,False +583,False +584,False +585,False +586,False +587,True +588,False +589,False +590,False +591,True +592,False +593,False +594,False +595,True +596,False +597,False +598,False +599,False +600,False +601,False +602,False +603,False +604,False +605,False +606,False +607,False +608,False +609,False +610,False +611,False +612,False +613,False +614,False +615,False +616,False +617,False +618,False +619,False +620,False +621,False +622,False +623,False +624,False +625,False +626,False +627,False +628,True +629,False +630,False +631,False +632,False +633,True +634,True +635,False +636,True +637,True +638,True +639,False +640,False +641,False +642,False +643,False +644,False +645,True +646,False +647,True +648,False +649,False +650,False +651,False +652,False +653,False +654,False +655,False +656,True +657,False +658,False +659,False +660,False +661,False +662,False +663,False +664,False +665,False +666,False +667,False +668,False +669,False +670,True +671,False +672,False +673,True +674,False +675,False +676,False +677,False +678,False +679,False +680,False +681,False +682,False +683,False +684,False +685,False +686,True +687,True +688,False +689,False +690,False +691,False +692,False +693,False +694,False +695,False +696,False +697,False +698,False +699,False +700,False +701,False +702,False +703,False +704,False +705,False +706,True +707,False +708,True +709,False +710,False +711,False +712,False +713,False +714,False +715,False +716,False +717,True +718,False +719,True +720,False +721,False +722,False +723,False +724,False +725,False +726,False +727,False +728,True +729,False +730,False +731,False +732,False +733,False +734,False +735,False +736,False +737,False +738,False +739,False +740,True +741,True +742,False +743,True +744,False +745,True +746,False +747,False +748,False +749,False +750,False +751,False +752,False +753,False +754,False +755,False +756,False +757,False From 8a0c1cd8afc1d520f6146c61efc5ab2d52f41a39 Mon Sep 17 00:00:00 2001 From: Atwine Date: Sat, 14 Mar 2020 15:12:45 +0300 Subject: [PATCH 2/4] make corrections --- .../KAYAGA NASHIM MILVAT-checkpoint.ipynb | 3015 ++++++++++++++++ Assignment Colab/KAYAGA NASHIM MILVAT.ipynb | 3016 ++++++++++++++++- 2 files changed, 6030 insertions(+), 1 deletion(-) create mode 100644 Assignment Colab/.ipynb_checkpoints/KAYAGA NASHIM MILVAT-checkpoint.ipynb diff --git a/Assignment Colab/.ipynb_checkpoints/KAYAGA NASHIM MILVAT-checkpoint.ipynb b/Assignment Colab/.ipynb_checkpoints/KAYAGA NASHIM MILVAT-checkpoint.ipynb new file mode 100644 index 0000000..36296d9 --- /dev/null +++ b/Assignment Colab/.ipynb_checkpoints/KAYAGA NASHIM MILVAT-checkpoint.ipynb @@ -0,0 +1,3015 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19", + "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5" + }, + "outputs": [], + "source": [ + "# This Python 3 environment comes with many helpful analytics libraries installed\n", + "# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python\n", + "# For example, here's several helpful packages to load in \n", + "\n", + "import numpy as np # linear algebra\n", + "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "# Input data files are available in the \"../input/\" directory.\n", + "# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n", + "\n", + "import os\n", + "#for dirname, _, filenames in os.walk('/kaggle/input'):\n", + "# for filename in filenames:\n", + "# print(os.path.join(dirname, filename))\n", + " \n", + "\n", + "# Any results you write to the current directory are saved as output." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# References\n", + "#### https://machinelearningmastery.com/evaluate-performance-machine-learning-algorithms-python-using-resampling/\n", + "#### https://www.dataquest.io/blog/top-10-machine-learning-algorithms-for-beginners/\n", + "#### https://monkeylearn.com/blog/introduction-to-support-vector-machines-svm/\n", + "#### https://towardsdatascience.com/understanding-random-forest-58381e0602d2\n", + "\n", + "\n", + "
\n", + " Please put these at the bottom." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "_cell_guid": "79c7e3d0-c299-4dcb-8224-4455121ee9b0", + "_uuid": "d629ff2d2480ee46fbb7e2d37f6b5fab8052498a" + }, + "outputs": [], + "source": [ + "import warnings\n", + "warnings.filterwarnings('ignore')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
FULL_ChargeFULL_AcidicMolPercFULL_AURR980107FULL_DAYM780201FULL_GEOR030101FULL_OOBM850104NT_EFC195AS_MeanAmphiMomentAS_DAYM780201AS_FUKS010112CT_RACS820104
04.03.7040.87373.5190.987-4.83300.38274.5567.2251.234
14.04.4440.89262.4440.931-0.58400.32056.0564.9421.853
22.00.0000.90147.0001.039-5.66400.16447.0005.9691.174
34.50.0000.86969.2220.982-5.42302.01069.2225.4621.138
4-4.021.5911.06171.6820.976-2.00202.75866.0005.5821.453
54.56.9770.89568.5120.950-1.87803.09072.0005.7791.844
612.03.1751.02274.4601.010-3.22503.17276.7225.6641.215
71.53.7040.93269.5190.977-2.50902.54372.0004.2511.560
83.03.3330.90359.5000.963-1.68202.99066.0005.1751.514
94.00.0000.87372.7920.998-4.94302.98577.4445.6261.621
1011.08.2190.92775.0680.989-3.11803.49376.3896.0471.126
114.52.7030.96669.7570.972-3.89613.71476.4445.4921.445
120.011.5381.02777.9230.981-3.95413.67978.0567.2221.054
136.03.3331.11479.1001.024-2.43703.98875.5566.6671.079
140.00.0001.00578.9711.1021.54404.14378.5564.4721.280
153.06.0610.89776.4550.955-4.03214.31081.2226.2071.506
163.02.1280.88964.0641.0000.58304.09774.6675.0971.302
179.50.0000.78655.6470.955-0.57703.81661.6674.8292.026
181.512.0000.94870.7200.956-3.55913.98266.5007.0241.050
194.00.0000.82854.8751.048-2.85304.29354.8755.2291.651
205.03.1250.90169.5940.995-1.67703.87658.5005.2781.486
211.50.0000.87267.2000.972-5.39204.47167.2006.5240.986
222.00.0000.78664.1500.969-4.70603.92963.7226.9411.040
238.00.0000.93362.8001.008-4.17004.10462.6674.8701.547
243.010.8111.08677.1081.010-2.11204.20878.0565.7481.249
254.02.9410.90071.2060.967-3.96304.12672.6116.8281.507
2610.00.0000.84569.5110.975-2.04903.97770.7224.8911.543
275.50.0000.94671.6671.023-4.98203.43971.6675.9531.108
289.00.0000.82254.5450.993-3.95503.56255.5006.0861.114
29-6.038.2351.23983.5591.0020.10813.31791.5005.7231.055
....................................
7281.04.1670.93167.8330.936-0.775014.99368.6676.0471.125
7291.07.4070.93081.2590.985-2.859014.99380.7786.0121.051
7300.025.0001.11979.6671.023-0.379014.98677.6676.0421.137
731-2.017.5001.06884.1000.995-1.328014.94390.0006.0811.484
7321.04.5450.83085.0910.996-3.729014.98285.8335.6121.165
7331.011.5381.05070.8851.015-2.530014.98175.7225.3371.327
7340.013.3330.92572.5330.993-2.093018.27972.5335.5251.147
7350.511.1111.13580.7781.034-1.448014.95180.7785.4111.079
736-0.513.3330.95775.2000.969-1.257017.79975.2005.9391.150
7375.010.2040.94778.1840.986-3.313015.30077.1116.2391.050
738-1.019.2311.06591.0771.0040.520015.21495.6115.3691.276
739-1.517.6471.11982.1471.005-1.477015.32181.3335.7711.016
7406.53.7040.90976.1111.009-2.840015.66679.2226.0231.242
7415.00.0000.83680.0970.973-2.795015.11082.6115.9231.245
742-3.513.3331.11575.2891.027-1.292015.05880.3895.8091.300
7434.08.3330.92879.0000.960-3.607015.09180.9445.4171.316
74410.517.0001.09182.1601.003-2.414015.27074.9445.6011.053
7455.512.8210.99667.5130.991-4.674014.98967.9446.9281.166
746-2.019.0481.03085.3330.969-1.162015.45886.5006.2841.156
747-2.520.5131.09982.7951.014-0.965015.71586.3335.5891.127
748-2.025.0001.17886.4171.0131.322022.98986.4175.5841.228
749-4.021.8751.02166.1880.994-0.523015.49971.2225.9391.080
7502.00.0001.05486.4621.051-0.815020.86686.4625.7121.298
751-2.014.8941.04283.2550.978-1.303015.26085.1675.7181.164
752-8.028.2611.22182.1091.014-0.153015.35581.1676.9040.933
753-1.516.0001.10082.8200.991-1.987015.18585.3337.0531.325
754-1.018.1821.08573.4551.027-0.745016.55074.6676.7291.132
755-1.019.0481.10882.1901.033-1.789016.11279.6676.0361.219
756-1.07.1430.95576.7861.0231.141020.63076.7865.6691.111
757-7.017.1431.07884.1861.009-0.066017.16876.6116.6881.305
\n", + "

758 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " FULL_Charge FULL_AcidicMolPerc FULL_AURR980107 FULL_DAYM780201 \\\n", + "0 4.0 3.704 0.873 73.519 \n", + "1 4.0 4.444 0.892 62.444 \n", + "2 2.0 0.000 0.901 47.000 \n", + "3 4.5 0.000 0.869 69.222 \n", + "4 -4.0 21.591 1.061 71.682 \n", + "5 4.5 6.977 0.895 68.512 \n", + "6 12.0 3.175 1.022 74.460 \n", + "7 1.5 3.704 0.932 69.519 \n", + "8 3.0 3.333 0.903 59.500 \n", + "9 4.0 0.000 0.873 72.792 \n", + "10 11.0 8.219 0.927 75.068 \n", + "11 4.5 2.703 0.966 69.757 \n", + "12 0.0 11.538 1.027 77.923 \n", + "13 6.0 3.333 1.114 79.100 \n", + "14 0.0 0.000 1.005 78.971 \n", + "15 3.0 6.061 0.897 76.455 \n", + "16 3.0 2.128 0.889 64.064 \n", + "17 9.5 0.000 0.786 55.647 \n", + "18 1.5 12.000 0.948 70.720 \n", + "19 4.0 0.000 0.828 54.875 \n", + "20 5.0 3.125 0.901 69.594 \n", + "21 1.5 0.000 0.872 67.200 \n", + "22 2.0 0.000 0.786 64.150 \n", + "23 8.0 0.000 0.933 62.800 \n", + "24 3.0 10.811 1.086 77.108 \n", + "25 4.0 2.941 0.900 71.206 \n", + "26 10.0 0.000 0.845 69.511 \n", + "27 5.5 0.000 0.946 71.667 \n", + "28 9.0 0.000 0.822 54.545 \n", + "29 -6.0 38.235 1.239 83.559 \n", + ".. ... ... ... ... \n", + "728 1.0 4.167 0.931 67.833 \n", + "729 1.0 7.407 0.930 81.259 \n", + "730 0.0 25.000 1.119 79.667 \n", + "731 -2.0 17.500 1.068 84.100 \n", + "732 1.0 4.545 0.830 85.091 \n", + "733 1.0 11.538 1.050 70.885 \n", + "734 0.0 13.333 0.925 72.533 \n", + "735 0.5 11.111 1.135 80.778 \n", + "736 -0.5 13.333 0.957 75.200 \n", + "737 5.0 10.204 0.947 78.184 \n", + "738 -1.0 19.231 1.065 91.077 \n", + "739 -1.5 17.647 1.119 82.147 \n", + "740 6.5 3.704 0.909 76.111 \n", + "741 5.0 0.000 0.836 80.097 \n", + "742 -3.5 13.333 1.115 75.289 \n", + "743 4.0 8.333 0.928 79.000 \n", + "744 10.5 17.000 1.091 82.160 \n", + "745 5.5 12.821 0.996 67.513 \n", + "746 -2.0 19.048 1.030 85.333 \n", + "747 -2.5 20.513 1.099 82.795 \n", + "748 -2.0 25.000 1.178 86.417 \n", + "749 -4.0 21.875 1.021 66.188 \n", + "750 2.0 0.000 1.054 86.462 \n", + "751 -2.0 14.894 1.042 83.255 \n", + "752 -8.0 28.261 1.221 82.109 \n", + "753 -1.5 16.000 1.100 82.820 \n", + "754 -1.0 18.182 1.085 73.455 \n", + "755 -1.0 19.048 1.108 82.190 \n", + "756 -1.0 7.143 0.955 76.786 \n", + "757 -7.0 17.143 1.078 84.186 \n", + "\n", + " FULL_GEOR030101 FULL_OOBM850104 NT_EFC195 AS_MeanAmphiMoment \\\n", + "0 0.987 -4.833 0 0.382 \n", + "1 0.931 -0.584 0 0.320 \n", + "2 1.039 -5.664 0 0.164 \n", + "3 0.982 -5.423 0 2.010 \n", + "4 0.976 -2.002 0 2.758 \n", + "5 0.950 -1.878 0 3.090 \n", + "6 1.010 -3.225 0 3.172 \n", + "7 0.977 -2.509 0 2.543 \n", + "8 0.963 -1.682 0 2.990 \n", + "9 0.998 -4.943 0 2.985 \n", + "10 0.989 -3.118 0 3.493 \n", + "11 0.972 -3.896 1 3.714 \n", + "12 0.981 -3.954 1 3.679 \n", + "13 1.024 -2.437 0 3.988 \n", + "14 1.102 1.544 0 4.143 \n", + "15 0.955 -4.032 1 4.310 \n", + "16 1.000 0.583 0 4.097 \n", + "17 0.955 -0.577 0 3.816 \n", + "18 0.956 -3.559 1 3.982 \n", + "19 1.048 -2.853 0 4.293 \n", + "20 0.995 -1.677 0 3.876 \n", + "21 0.972 -5.392 0 4.471 \n", + "22 0.969 -4.706 0 3.929 \n", + "23 1.008 -4.170 0 4.104 \n", + "24 1.010 -2.112 0 4.208 \n", + "25 0.967 -3.963 0 4.126 \n", + "26 0.975 -2.049 0 3.977 \n", + "27 1.023 -4.982 0 3.439 \n", + "28 0.993 -3.955 0 3.562 \n", + "29 1.002 0.108 1 3.317 \n", + ".. ... ... ... ... \n", + "728 0.936 -0.775 0 14.993 \n", + "729 0.985 -2.859 0 14.993 \n", + "730 1.023 -0.379 0 14.986 \n", + "731 0.995 -1.328 0 14.943 \n", + "732 0.996 -3.729 0 14.982 \n", + "733 1.015 -2.530 0 14.981 \n", + "734 0.993 -2.093 0 18.279 \n", + "735 1.034 -1.448 0 14.951 \n", + "736 0.969 -1.257 0 17.799 \n", + "737 0.986 -3.313 0 15.300 \n", + "738 1.004 0.520 0 15.214 \n", + "739 1.005 -1.477 0 15.321 \n", + "740 1.009 -2.840 0 15.666 \n", + "741 0.973 -2.795 0 15.110 \n", + "742 1.027 -1.292 0 15.058 \n", + "743 0.960 -3.607 0 15.091 \n", + "744 1.003 -2.414 0 15.270 \n", + "745 0.991 -4.674 0 14.989 \n", + "746 0.969 -1.162 0 15.458 \n", + "747 1.014 -0.965 0 15.715 \n", + "748 1.013 1.322 0 22.989 \n", + "749 0.994 -0.523 0 15.499 \n", + "750 1.051 -0.815 0 20.866 \n", + "751 0.978 -1.303 0 15.260 \n", + "752 1.014 -0.153 0 15.355 \n", + "753 0.991 -1.987 0 15.185 \n", + "754 1.027 -0.745 0 16.550 \n", + "755 1.033 -1.789 0 16.112 \n", + "756 1.023 1.141 0 20.630 \n", + "757 1.009 -0.066 0 17.168 \n", + "\n", + " AS_DAYM780201 AS_FUKS010112 CT_RACS820104 \n", + "0 74.556 7.225 1.234 \n", + "1 56.056 4.942 1.853 \n", + "2 47.000 5.969 1.174 \n", + "3 69.222 5.462 1.138 \n", + "4 66.000 5.582 1.453 \n", + "5 72.000 5.779 1.844 \n", + "6 76.722 5.664 1.215 \n", + "7 72.000 4.251 1.560 \n", + "8 66.000 5.175 1.514 \n", + "9 77.444 5.626 1.621 \n", + "10 76.389 6.047 1.126 \n", + "11 76.444 5.492 1.445 \n", + "12 78.056 7.222 1.054 \n", + "13 75.556 6.667 1.079 \n", + "14 78.556 4.472 1.280 \n", + "15 81.222 6.207 1.506 \n", + "16 74.667 5.097 1.302 \n", + "17 61.667 4.829 2.026 \n", + "18 66.500 7.024 1.050 \n", + "19 54.875 5.229 1.651 \n", + "20 58.500 5.278 1.486 \n", + "21 67.200 6.524 0.986 \n", + "22 63.722 6.941 1.040 \n", + "23 62.667 4.870 1.547 \n", + "24 78.056 5.748 1.249 \n", + "25 72.611 6.828 1.507 \n", + "26 70.722 4.891 1.543 \n", + "27 71.667 5.953 1.108 \n", + "28 55.500 6.086 1.114 \n", + "29 91.500 5.723 1.055 \n", + ".. ... ... ... \n", + "728 68.667 6.047 1.125 \n", + "729 80.778 6.012 1.051 \n", + "730 77.667 6.042 1.137 \n", + "731 90.000 6.081 1.484 \n", + "732 85.833 5.612 1.165 \n", + "733 75.722 5.337 1.327 \n", + "734 72.533 5.525 1.147 \n", + "735 80.778 5.411 1.079 \n", + "736 75.200 5.939 1.150 \n", + "737 77.111 6.239 1.050 \n", + "738 95.611 5.369 1.276 \n", + "739 81.333 5.771 1.016 \n", + "740 79.222 6.023 1.242 \n", + "741 82.611 5.923 1.245 \n", + "742 80.389 5.809 1.300 \n", + "743 80.944 5.417 1.316 \n", + "744 74.944 5.601 1.053 \n", + "745 67.944 6.928 1.166 \n", + "746 86.500 6.284 1.156 \n", + "747 86.333 5.589 1.127 \n", + "748 86.417 5.584 1.228 \n", + "749 71.222 5.939 1.080 \n", + "750 86.462 5.712 1.298 \n", + "751 85.167 5.718 1.164 \n", + "752 81.167 6.904 0.933 \n", + "753 85.333 7.053 1.325 \n", + "754 74.667 6.729 1.132 \n", + "755 79.667 6.036 1.219 \n", + "756 76.786 5.669 1.111 \n", + "757 76.611 6.688 1.305 \n", + "\n", + "[758 rows x 11 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test = pd.read_csv(\"../AMP Data Sets/Test.csv\")\n", + "test" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(758, 11)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
FULL_ChargeFULL_AcidicMolPercFULL_AURR980107FULL_DAYM780201FULL_GEOR030101FULL_OOBM850104NT_EFC195AS_MeanAmphiMomentAS_DAYM780201AS_FUKS010112CT_RACS820104CLASS
05.00.0000.95174.8420.975-3.66300.28273.4445.6611.0411
14.05.4050.93171.5950.957-4.01110.60068.2226.5371.4531
25.55.4050.87373.5950.961-2.51200.59369.4444.9341.7221
35.04.1670.89566.2500.999-1.36200.61467.2224.3161.3821
47.58.5370.93264.7200.979-2.09100.61672.9444.5401.5391
\n", + "
" + ], + "text/plain": [ + " FULL_Charge FULL_AcidicMolPerc FULL_AURR980107 FULL_DAYM780201 \\\n", + "0 5.0 0.000 0.951 74.842 \n", + "1 4.0 5.405 0.931 71.595 \n", + "2 5.5 5.405 0.873 73.595 \n", + "3 5.0 4.167 0.895 66.250 \n", + "4 7.5 8.537 0.932 64.720 \n", + "\n", + " FULL_GEOR030101 FULL_OOBM850104 NT_EFC195 AS_MeanAmphiMoment \\\n", + "0 0.975 -3.663 0 0.282 \n", + "1 0.957 -4.011 1 0.600 \n", + "2 0.961 -2.512 0 0.593 \n", + "3 0.999 -1.362 0 0.614 \n", + "4 0.979 -2.091 0 0.616 \n", + "\n", + " AS_DAYM780201 AS_FUKS010112 CT_RACS820104 CLASS \n", + "0 73.444 5.661 1.041 1 \n", + "1 68.222 6.537 1.453 1 \n", + "2 69.444 4.934 1.722 1 \n", + "3 67.222 4.316 1.382 1 \n", + "4 72.944 4.540 1.539 1 " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#read in the data\n", + "data = pd.read_csv(\"../AMP Data Sets/AMP_TrainSet.csv\")\n", + "data.head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Analyze data by describing\n", + "\n", + "#### This step helped me know which features are in my dataset, are they categorical or numerical.\n", + "#### How many rows and columns does the dataset have\n", + "#### The data types for the various features\n", + "#### Checked whether the dataset has null or missing values\n", + "\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
FULL_ChargeFULL_AcidicMolPercFULL_AURR980107FULL_DAYM780201FULL_GEOR030101FULL_OOBM850104NT_EFC195AS_MeanAmphiMomentAS_DAYM780201AS_FUKS010112CT_RACS820104CLASS
count3038.0000003038.0000003038.0000003038.0000003038.0000003038.0000003038.0000003038.0000003038.0000003038.0000003038.0000003038.000000
mean2.0602378.5215200.97141073.6687600.994007-2.4329270.08854515.68323373.6508285.9113611.2352550.500000
std3.8199297.5866520.1074138.5274890.0313331.7072230.28413311.5756659.1660920.6936890.2100120.500082
min-16.0000000.0000000.68400042.7500000.866000-10.4320000.0000000.04100042.7780003.5330000.7850000.000000
25%0.0000002.5160000.89500068.2940000.974000-3.6060000.0000005.58750067.5560005.4592501.0820000.000000
50%2.0000007.1430000.96300074.0595000.994000-2.2965000.00000014.98850073.6970005.9255001.1840000.500000
75%4.00000013.1580001.04100079.3437501.011000-1.2832500.00000026.80775079.7780006.3820001.3510001.000000
max30.00000046.6670001.451000101.6820001.1960003.5760001.00000051.280000103.1670008.6620002.1920001.000000
\n", + "
" + ], + "text/plain": [ + " FULL_Charge FULL_AcidicMolPerc FULL_AURR980107 FULL_DAYM780201 \\\n", + "count 3038.000000 3038.000000 3038.000000 3038.000000 \n", + "mean 2.060237 8.521520 0.971410 73.668760 \n", + "std 3.819929 7.586652 0.107413 8.527489 \n", + "min -16.000000 0.000000 0.684000 42.750000 \n", + "25% 0.000000 2.516000 0.895000 68.294000 \n", + "50% 2.000000 7.143000 0.963000 74.059500 \n", + "75% 4.000000 13.158000 1.041000 79.343750 \n", + "max 30.000000 46.667000 1.451000 101.682000 \n", + "\n", + " FULL_GEOR030101 FULL_OOBM850104 NT_EFC195 AS_MeanAmphiMoment \\\n", + "count 3038.000000 3038.000000 3038.000000 3038.000000 \n", + "mean 0.994007 -2.432927 0.088545 15.683233 \n", + "std 0.031333 1.707223 0.284133 11.575665 \n", + "min 0.866000 -10.432000 0.000000 0.041000 \n", + "25% 0.974000 -3.606000 0.000000 5.587500 \n", + "50% 0.994000 -2.296500 0.000000 14.988500 \n", + "75% 1.011000 -1.283250 0.000000 26.807750 \n", + "max 1.196000 3.576000 1.000000 51.280000 \n", + "\n", + " AS_DAYM780201 AS_FUKS010112 CT_RACS820104 CLASS \n", + "count 3038.000000 3038.000000 3038.000000 3038.000000 \n", + "mean 73.650828 5.911361 1.235255 0.500000 \n", + "std 9.166092 0.693689 0.210012 0.500082 \n", + "min 42.778000 3.533000 0.785000 0.000000 \n", + "25% 67.556000 5.459250 1.082000 0.000000 \n", + "50% 73.697000 5.925500 1.184000 0.500000 \n", + "75% 79.778000 6.382000 1.351000 1.000000 \n", + "max 103.167000 8.662000 2.192000 1.000000 " + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Generate descriptive statistics that summarize the central tendency, dispersion, and shape of a dataset’s distribution, excluding NaN values\n", + "data.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "FULL_Charge 0\n", + "FULL_AcidicMolPerc 0\n", + "FULL_AURR980107 0\n", + "FULL_DAYM780201 0\n", + "FULL_GEOR030101 0\n", + "FULL_OOBM850104 0\n", + "NT_EFC195 0\n", + "AS_MeanAmphiMoment 0\n", + "AS_DAYM780201 0\n", + "AS_FUKS010112 0\n", + "CT_RACS820104 0\n", + "CLASS 0\n", + "dtype: int64" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#number of null values in each column\n", + "data.isnull().sum()\n", + "#since my data has no null values then its good to go" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### needed to know how balanced the class values are\n", + "\n", + "
\n", + " What did you learn from all the steps above." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "data.groupby('CLASS').size().plot(kind='bar')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Its a good idea to review all the pairwise correlations of the attributes in the dataset because some machine learning algorithm like linear and logistic regression can suffer poor performance if there are highly correlated attributes in the dataset\n", + "\n", + "
\n", + " Good explanation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "data.corr(method='pearson')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### heat map to show the correlation of the data; plots that show the interactions between multiple variables in the dataset\n", + "#### Correlation gives an indication of how related the changes are between two variables. If two variables change in the same direction they are positively correlated. If they change in opposite directions together (one goes up, one goes down), then they are negatively correlated. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(8,8))\n", + "sns.heatmap(data.corr(method='pearson'))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### also checked the corelation in regards to the class since am trying to build a ML agorithm for that class" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "data.corr(method='pearson')['CLASS']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Most of my variables are positively skewed" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + " data.skew().plot(kind='bar')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## understanding data with visualization\n", + "#### Data can be visualised in many ways that is univariate plots and multivariate plots #### Used the Histogram for univariate plot as shown below and the correlation matrix plot as the multivariate plot as shown above" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Histogram\n", + "#### This helps to understand each attribute of my dataset independently.\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Data pre-processing" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(18,18))\n", + "data.hist()\n", + "plt.subplots_adjust(bottom=3, right=2, top=5)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Standardize data\n", + "#### Standardization is a useful technique to transform attributes with a Gaussian distribution and differing means and standard deviations to a standard Gaussian distribution with a mean of 0 and a standard deviation of 1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import StandardScaler\n", + "array = data.values\n", + "#separate array into input and output components\n", + "X = array[:,0:11]\n", + "Y = array[:,11]\n", + "scaler = StandardScaler().fit(X)\n", + "rescaledX = scaler.transform(X)\n", + "# summarize transformed data\n", + "#set_printoptions(precision=3)\n", + "print(rescaledX[0:5,:])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "array = test.values\n", + "scaler = StandardScaler().fit(array)\n", + "rescaledt = scaler.transform(array)\n", + "# summarize transformed data\n", + "#set_printoptions(precision=3)\n", + "print(rescaledt[0:5,:])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Feature selection\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### it's the process of selecting a subset of relevant features for use in model construction" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Chose Recursive Feature Elimination\n", + "#### This is an automatic feature selection technique\n", + "#### Used logistic regression it is a good baseline as it is fast to train and predict and scales well.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.feature_selection import RFE\n", + "from sklearn.linear_model import LogisticRegression\n", + "\n", + "array = data.values\n", + "X = array[:,0:11]\n", + "Y = array[:,11]\n", + "# feature extraction\n", + "model = LogisticRegression()\n", + "rfe = RFE(model,8)\n", + "fit = rfe.fit(X,Y)\n", + "print(\"Num Features:\", fit.n_features_)\n", + "print(\"Selected Features:\", fit.support_)\n", + "print(\"Feature Ranking:\", fit.ranking_)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "X[:,fit.support_]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "drop=data.drop(['FULL_AcidicMolPerc', 'FULL_DAYM780201', 'AS_DAYM780201'],axis=1)\n", + "drop" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "drop_test = test.drop(['FULL_AcidicMolPerc', 'FULL_DAYM780201', 'AS_DAYM780201'],axis=1)\n", + "drop_test" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "1. #### Decided to first use all the first\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Evaluate the Performance of Machine Learning Algorithms with Resampling¶\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### The best way to evaluate the performance of an algorithm would be to make predictions for new data to which you already know the answers.\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Split into Train and Test Sets" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### This algorithm evaluation technique is very fast. It is ideal for large datasets where there is strong evidence that both splits of the data are representative of the underlying problem. Because of the speed, it is useful to use this approach when the algorithm you are investigating is slow to train.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "from sklearn.linear_model import LogisticRegression\n", + "\n", + "array = data.values\n", + "X = array[:,0:11]\n", + "Y = array[:,11]\n", + "test_size = 0.30\n", + "seed = 7\n", + "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=test_size,\n", + "random_state=seed)\n", + "model = LogisticRegression()\n", + "model.fit(X_train, Y_train)\n", + "result = model.score(X_test, Y_test)\n", + "print(\"Accuracy: \", (result*100.0))\n", + "\n", + "\n", + "model.fit(X,Y)\n", + "output = model.predict(test.values)\n", + "\n", + "from sklearn.metrics import matthews_corrcoef\n", + "mcc = matthews_corrcoef(model.predict(X),Y)\n", + "print('MCC:',mcc)\n", + " \n", + "report = pd.DataFrame(output)\n", + "report.columns = ['CLASS']\n", + "report.index.name = \"Index\"\n", + "report['CLASS']=report['CLASS'].map({0.0:False, 1.0:True})\n", + "report.to_csv(\"report.csv\")\n", + "\n", + "print(report['CLASS'].unique())\n", + "print('False: ',report.groupby('CLASS').size()[0].sum())\n", + "print('True: ',report.groupby('CLASS').size()[1].sum())\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## K-fold Cross Validation" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### It is more accurate because the algorithm is trained and evaluated multiple times on different data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import KFold\n", + "from sklearn.model_selection import cross_val_score\n", + "\n", + "num_folds = 10 #number of folds to use\n", + "seed = 7 #reproducibility\n", + "\n", + "kfold = KFold(n_splits=num_folds, random_state=seed)\n", + "model = LogisticRegression()\n", + "results = cross_val_score(model, X, Y, cv=kfold)\n", + "\n", + "print(f\"Accuracy:\", (results.mean()*100.0, results.std()*100.0))\n", + "\n", + "\n", + "model.fit(X,Y)\n", + "output = model.predict(test.values)\n", + "\n", + "from sklearn.metrics import matthews_corrcoef\n", + "mcc = matthews_corrcoef(model.predict(X),Y)\n", + "print('MCC:',mcc)\n", + " \n", + "report_kf = pd.DataFrame(output)\n", + "report_kf.columns = ['CLASS']\n", + "report_kf.index.name = \"Index\"\n", + "report_kf['CLASS']=report_kf['CLASS'].map({0.0:False, 1.0:True})\n", + "report_kf.to_csv(\"report_kf.csv\")\n", + "\n", + "print(report_kf['CLASS'].unique())\n", + "print('False: ',report_kf.groupby('CLASS').size()[0].sum())\n", + "print('True: ',report_kf.groupby('CLASS').size()[1].sum())\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Leave One Out Cross Validation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import LeaveOneOut\n", + "from sklearn.model_selection import cross_val_score\n", + "\n", + "num_folds = 10\n", + "loocv = LeaveOneOut()\n", + "model = LogisticRegression()\n", + "results = cross_val_score(model, X, Y, cv=loocv)\n", + "print(\"Accuracy:\", (results.mean()*100.0, results.std()*100.0))\n", + "\n", + "\n", + "model.fit(X,Y)\n", + "output = model.predict(test.values)\n", + "\n", + "from sklearn.metrics import matthews_corrcoef\n", + "mcc = matthews_corrcoef(model.predict(X),Y)\n", + "print('MCC:',mcc)\n", + " \n", + "report_l = pd.DataFrame(output)\n", + "report_l.columns = ['CLASS']\n", + "report_l.index.name = \"Index\"\n", + "report_l['CLASS']=report_l['CLASS'].map({0.0:False, 1.0:True})\n", + "report_l.to_csv(\"report_l.csv\")\n", + "\n", + "print(report_l['CLASS'].unique())\n", + "print('False: ',report_l.groupby('CLASS').size()[0].sum())\n", + "print('True: ',report_l.groupby('CLASS').size()[1].sum())\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Repeated Random Test-Train Splits" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Creates a random split of the data like the train/test split , but repeats the process of splitting and evaluation of the algorithm multiple times, like cross validation. Repeated random splits can be useful intermediates when trying to balance variance in the estimated performance, model training speed and dataset size\n", + "#### In this I prefered using Repeated Random Test_Train Splits because when you look at the dataset the zeros are one side and the ones on the otherside in the 'class' column. So I would prefer to first shuffle the data and then split it to reduce on the bias" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import ShuffleSplit\n", + "from sklearn.model_selection import cross_val_score\n", + "\n", + "n_splits = 10\n", + "test_size = 0.30\n", + "seed = 7\n", + "kfold = ShuffleSplit(n_splits=n_splits, test_size=test_size, random_state=seed)\n", + "model = LogisticRegression()\n", + "results = cross_val_score(model, X, Y, cv=kfold)\n", + "print(\"Accuracy: \" , (results.mean()*100.0, results.std()*100.0))\n", + "\n", + "\n", + "model.fit(X,Y)\n", + "output = model.predict(test.values)\n", + "\n", + "from sklearn.metrics import matthews_corrcoef\n", + "mcc = matthews_corrcoef(model.predict(X),Y)\n", + "print('MCC:',mcc)\n", + " \n", + "report_rrt = pd.DataFrame(output)\n", + "report_rrt.columns = ['CLASS']\n", + "report_rrt.index.name = \"Index\"\n", + "report_rrt['CLASS']=report_rrt['CLASS'].map({0.0:False, 1.0:True})\n", + "report_rrt.to_csv(\"report_rrt.csv\")\n", + "\n", + "print(report_rrt['CLASS'].unique())\n", + "print('False: ',report_rrt.groupby('CLASS').size()[0].sum())\n", + "print('True: ',report_rrt.groupby('CLASS').size()[1].sum())\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Machine Learning Algorithm Performance Metrics" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Algorithms Overview\n", + "### linear machine learning algorithms:\n", + "\n", + " Logistic Regression.\n", + " Linear Discriminant Analysis.\n", + "### onlinear machine learning algorithms\n", + "\n", + " k-Nearest Neighbors.\n", + " Naive Bayes.\n", + " Classication and Regression Trees.\n", + " Support Vector Machines.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Linear Machine Learning Algorithms" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Logistic Regression" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Logistic regression is best suited for binary classification: data sets where y = 0 or 1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Using standardized data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Logistic regression on standardized data\n", + "num_folds = 10\n", + "kfold = KFold(n_splits=10, random_state=7)\n", + "model = LogisticRegression()\n", + "results = cross_val_score(model, X, Y, cv=kfold)\n", + "print(results.mean())\n", + "\n", + "model.fit(rescaledX,Y)\n", + "output = model.predict(rescaledt)\n", + "\n", + "from sklearn.metrics import matthews_corrcoef\n", + "mcc = matthews_corrcoef(model.predict(X),Y)\n", + "print('MCC:',mcc)\n", + " \n", + "report_scaled = pd.DataFrame(output)\n", + "report_scaled.columns = ['CLASS']\n", + "report_scaled.index.name = \"Index\"\n", + "report_scaled['CLASS']=report_scaled['CLASS'].map({0.0:False, 1.0:True})\n", + "report_scaled.to_csv(\"report_scaled.csv\")\n", + "\n", + "print(report_scaled['CLASS'].unique())\n", + "print('False: ',report_scaled.groupby('CLASS').size()[0].sum())\n", + "print('True: ',report_scaled.groupby('CLASS').size()[1].sum())\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Logistic Regression Classification\n", + "\n", + "num_folds = 10\n", + "kfold = KFold(n_splits=10, random_state=7)\n", + "model = LogisticRegression()\n", + "results = cross_val_score(model, X, Y, cv=kfold)\n", + "print(results.mean())\n", + "\n", + "model.fit(X,Y)\n", + "output = model.predict(test.values)\n", + "\n", + "from sklearn.metrics import matthews_corrcoef\n", + "mcc = matthews_corrcoef(model.predict(X),Y)\n", + "print('MCC:',mcc)\n", + " \n", + "my_report = pd.DataFrame(output)\n", + "my_report.columns = ['CLASS']\n", + "my_report.index.name = \"Index\"\n", + "my_report['CLASS']=my_report['CLASS'].map({0.0:False, 1.0:True})\n", + "my_report.to_csv(\"report_XGB.csv\")\n", + "\n", + "print(my_report['CLASS'].unique())\n", + "print('False: ',my_report.groupby('CLASS').size()[0].sum())\n", + "print('True: ',my_report.groupby('CLASS').size()[1].sum())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Linear Discriminant Analysis¶\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n", + "\n", + "num_folds = 10\n", + "kfold = KFold(n_splits=10, random_state=7)\n", + "model = LinearDiscriminantAnalysis()\n", + "results = cross_val_score(model, X, Y, cv=kfold)\n", + "print(results.mean())\n", + "\n", + "\n", + "model.fit(X,Y)\n", + "output = model.predict(test.values)\n", + "\n", + "from sklearn.metrics import matthews_corrcoef\n", + "mcc = matthews_corrcoef(model.predict(X),Y)\n", + "print('MCC:',mcc)\n", + " \n", + "lda_report = pd.DataFrame(output)\n", + "lda_report.columns = ['CLASS']\n", + "lda_report.index.name = \"Index\"\n", + "lda_report['CLASS']=lda_report['CLASS'].map({0.0:False, 1.0:True})\n", + "lda_report.to_csv(\"ldareport.csv\")\n", + "\n", + "print(lda_report['CLASS'].unique())\n", + "print('False: ',lda_report.groupby('CLASS').size()[0].sum())\n", + "print('True: ',lda_report.groupby('CLASS').size()[1].sum())\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Nonlinear Machine Learning Algorithms" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### k-Nearest Neighbors" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.neighbors import KNeighborsClassifier\n", + "num_folds = 10\n", + "kfold = KFold(n_splits=10, random_state=7)\n", + "model = KNeighborsClassifier()\n", + "results = cross_val_score(model, X, Y, cv=kfold)\n", + "print(results.mean())\n", + "\n", + "\n", + "\n", + "model.fit(X,Y)\n", + "output = model.predict(test.values)\n", + "\n", + "from sklearn.metrics import matthews_corrcoef\n", + "mcc = matthews_corrcoef(model.predict(X),Y)\n", + "print('MCC:',mcc)\n", + " \n", + "report_k = pd.DataFrame(output)\n", + "report_k.columns = ['CLASS']\n", + "report_k.index.name = \"Index\"\n", + "report_k['CLASS']=report_k['CLASS'].map({0.0:False, 1.0:True})\n", + "report_k.to_csv(\"report_k.csv\")\n", + "\n", + "\n", + "print(report_k['CLASS'].unique())\n", + "print('False: ',report_k.groupby('CLASS').size()[0].sum())\n", + "print('True: ',report_k.groupby('CLASS').size()[1].sum())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Naive Bayes" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Tried using Standardised data on Naive Bayes\n", + "\n", + "### When I predicted Naive Bayes on Standardised data gave me a score of 0.98235, after feature selection it gave 0.90 and on unstandardised data it gave a score of 0.9959" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Naive Bayes on standardised data\n", + "from sklearn.naive_bayes import GaussianNB\n", + "\n", + "kfold = KFold(n_splits=10, random_state=7)\n", + "model = GaussianNB()\n", + "results = cross_val_score(model, X, Y, cv=kfold)\n", + "print(results.mean())\n", + "\n", + "\n", + "model.fit(rescaledX,Y)\n", + "output = model.predict(rescaledt)\n", + "\n", + "from sklearn.metrics import matthews_corrcoef\n", + "mcc = matthews_corrcoef(model.predict(X),Y)\n", + "print('MCC:',mcc)\n", + " \n", + "report_rebayes = pd.DataFrame(output)\n", + "report_rebayes.columns = ['CLASS']\n", + "report_rebayes.index.name = \"Index\"\n", + "report_rebayes['CLASS']=report_rebayes['CLASS'].map({0.0:False, 1.0:True})\n", + "report_rebayes.to_csv(\"report_rebayes.csv\")\n", + "\n", + "\n", + "print(report_rebayes['CLASS'].unique())\n", + "print('False: ',report_rebayes.groupby('CLASS').size()[0].sum())\n", + "print('True: ',report_rebayes.groupby('CLASS').size()[1].sum())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Naive Bayes on selected features" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Naive Bayes on selected features\n", + "\n", + "array = data.values\n", + "X = array[:,0:11]\n", + "Y = array[:,11]\n", + "\n", + "selectedX = X[:,fit.support_]\n", + "\n", + "array2 =test.values\n", + "selectedT = array2[:,fit.support_]\n", + "\n", + "kfold = KFold(n_splits=10, random_state=7)\n", + "model = GaussianNB()\n", + "results = cross_val_score(model, selectedX, Y, cv=kfold)\n", + "print(results.mean())\n", + "\n", + "\n", + "model.fit(selectedX,Y)\n", + "output = model.predict(selectedT)\n", + "\n", + "from sklearn.metrics import matthews_corrcoef\n", + "mcc = matthews_corrcoef(model.predict(selectedX),Y)\n", + "print('MCC:',mcc)\n", + " \n", + "report_sel = pd.DataFrame(output)\n", + "report_sel.columns = ['CLASS']\n", + "report_sel.index.name = \"Index\"\n", + "report_sel['CLASS']=report_sel['CLASS'].map({0.0:False, 1.0:True})\n", + "report_sel.to_csv(\"report_sel.csv\")\n", + "\n", + "\n", + "print(report_sel['CLASS'].unique())\n", + "print('False: ',report_sel.groupby('CLASS').size()[0].sum())\n", + "print('True: ',report_sel.groupby('CLASS').size()[1].sum())\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.naive_bayes import GaussianNB\n", + "\n", + "kfold = KFold(n_splits=10, random_state=7)\n", + "model = GaussianNB()\n", + "results = cross_val_score(model, X, Y, cv=kfold)\n", + "print(results.mean())\n", + "\n", + "\n", + "model.fit(X,Y)\n", + "output = model.predict(test.values)\n", + "\n", + "from sklearn.metrics import matthews_corrcoef\n", + "mcc = matthews_corrcoef(model.predict(X),Y)\n", + "print('MCC:',mcc)\n", + " \n", + "report_bayes = pd.DataFrame(output)\n", + "report_bayes.columns = ['CLASS']\n", + "report_bayes.index.name = \"Index\"\n", + "report_bayes['CLASS']=report_bayes['CLASS'].map({0.0:False, 1.0:True})\n", + "report_bayes.to_csv(\"report_bayes.csv\")\n", + "\n", + "\n", + "print(report_bayes['CLASS'].unique())\n", + "print('False: ',report_bayes.groupby('CLASS').size()[0].sum())\n", + "print('True: ',report_bayes.groupby('CLASS').size()[1].sum())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Classiffication and Regression Trees" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### used for classification or regression predictive modeling problems" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.tree import DecisionTreeClassifier\n", + "kfold = KFold(n_splits=10, random_state=7)\n", + "model = DecisionTreeClassifier()\n", + "results = cross_val_score(model, X, Y, cv=kfold)\n", + "print(results.mean())\n", + "\n", + "\n", + "model.fit(X,Y)\n", + "output = model.predict(test.values)\n", + "\n", + "from sklearn.metrics import matthews_corrcoef\n", + "mcc = matthews_corrcoef(model.predict(X),Y)\n", + "print('MCC:',mcc)\n", + " \n", + "report_tree = pd.DataFrame(output)\n", + "report_tree.columns = ['CLASS']\n", + "report_tree.index.name = \"Index\"\n", + "report_tree['CLASS']=report_tree['CLASS'].map({0.0:False, 1.0:True})\n", + "report_tree.to_csv(\"report_tree.csv\")\n", + "\n", + "\n", + "print(report_tree['CLASS'].unique())\n", + "print('False: ',report_tree.groupby('CLASS').size()[0].sum())\n", + "print('True: ',report_tree.groupby('CLASS').size()[1].sum())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Support Vector Machines " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### A support vector machine (SVM) is a supervised machine learning model that uses classification algorithms for two-group classification problems" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.svm import SVC\n", + "\n", + "kfold = KFold(n_splits=10, random_state=7)\n", + "model = SVC()\n", + "results = cross_val_score(model, X, Y, cv=kfold)\n", + "print(results.mean())\n", + "\n", + "model.fit(X,Y)\n", + "output = model.predict(test.values)\n", + "\n", + "from sklearn.metrics import matthews_corrcoef\n", + "mcc = matthews_corrcoef(model.predict(X),Y)\n", + "print('MCC:',mcc)\n", + " \n", + "report_svm = pd.DataFrame(output)\n", + "report_svm.columns = ['CLASS']\n", + "report_svm.index.name = \"Index\"\n", + "report_svm['CLASS']=report_svm['CLASS'].map({0.0:False, 1.0:True})\n", + "report_svm.to_csv(\"report_svm.csv\")\n", + "\n", + "\n", + "print(report_svm['CLASS'].unique())\n", + "print('False: ',report_svm.groupby('CLASS').size()[0].sum())\n", + "print('True: ',report_svm.groupby('CLASS').size()[1].sum())\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Combine Models Into Ensemble Predictions\n", + "\n", + "The three most popular methods for combining the predictions from different models are:\n", + " \n", + " Bagging\n", + " Boosting\n", + " Voting" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> # BoostingAlgorithms" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### These seek to improve the prediction power by training a sequence of weak models, each compensating the weaknesses of its predecessors.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## AdaBoost" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### This is specifically designed for classification problems" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# AdaBoost Classification\n", + "from pandas import read_csv\n", + "from sklearn.model_selection import KFold\n", + "from sklearn.model_selection import cross_val_score\n", + "from sklearn.ensemble import AdaBoostClassifier\n", + "\n", + "\n", + "X = array[:,0:11]\n", + "Y = array[:,11]\n", + "\n", + "num_trees = 39\n", + "seed=10\n", + "\n", + "kfold = KFold(n_splits=10, random_state=seed)\n", + "\n", + "model = AdaBoostClassifier(n_estimators=num_trees, random_state=seed)\n", + "results = cross_val_score(model, X, Y, cv=kfold)\n", + "\n", + "print(results.mean())\n", + "\n", + "model.fit(X,Y)\n", + "output = model.predict(test.values)\n", + "\n", + "from sklearn.metrics import matthews_corrcoef\n", + "mcc = matthews_corrcoef(model.predict(X),Y)\n", + "print('MCC:',mcc)\n", + " \n", + "report_ada = pd.DataFrame(output)\n", + "report_ada.columns = ['CLASS']\n", + "report_ada.index.name = \"Index\"\n", + "report_ada['CLASS']=report_ada['CLASS'].map({0.0:False, 1.0:True})\n", + "report_ada.to_csv(\"report_ada.csv\")\n", + "\n", + "\n", + "print(report_ada['CLASS'].unique())\n", + "print('False: ',report_ada.groupby('CLASS').size()[0].sum())\n", + "print('True: ',report_ada.groupby('CLASS').size()[1].sum())\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Bagging Algorithms" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Bagging is used with decision trees where it significantly raises the stability of models in the reduction of variance and improving accuracy, which eliminates the challenge of overfitting." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Bagged Decision Trees" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Bagged Decision Trees for Classification\n", + "from pandas import read_csv\n", + "from sklearn.model_selection import KFold\n", + "from sklearn.model_selection import cross_val_score\n", + "from sklearn.ensemble import BaggingClassifier\n", + "from sklearn.tree import DecisionTreeClassifier\n", + "\n", + "#split the data in portions\n", + "X = array[:,0:11]\n", + "Y = array[:,11]\n", + "seed = 7 #duplication\n", + "\n", + "#split according to cross validation\n", + "kfold = KFold(n_splits=10, random_state=seed)\n", + "\n", + "#initialize the model\n", + "cart = DecisionTreeClassifier()\n", + "\n", + "#bagging\n", + "num_trees = 250\n", + "\n", + "#model\n", + "model = BaggingClassifier(base_estimator=cart, n_estimators=num_trees, random_state=seed)\n", + "\n", + "results = cross_val_score(model, X, Y, cv=kfold)\n", + "print(results.mean())\n", + "\n", + "model.fit(X,Y)\n", + "output = model.predict(test.values)\n", + "\n", + "from sklearn.metrics import matthews_corrcoef\n", + "mcc = matthews_corrcoef(model.predict(X),Y)\n", + "print('MCC:',mcc)\n", + " \n", + "report_bag = pd.DataFrame(output)\n", + "report_bag.columns = ['CLASS']\n", + "report_bag.index.name = \"Index\"\n", + "report_bag['CLASS']=report_bag['CLASS'].map({0.0:False, 1.0:True})\n", + "report_bag.to_csv(\"report_bag.csv\")\n", + "\n", + "\n", + "print(report_bag['CLASS'].unique())\n", + "print('False: ',report_bag.groupby('CLASS').size()[0].sum())\n", + "print('True: ',report_bag.groupby('CLASS').size()[1].sum())\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Random Forest" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Random Forest Classification\n", + "from pandas import read_csv\n", + "from sklearn.model_selection import KFold\n", + "from sklearn.model_selection import cross_val_score\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "\n", + "\n", + "X = array[:,0:11]\n", + "Y = array[:,11]\n", + "\n", + "num_trees = 1000\n", + "\n", + "max_features = 3\n", + "\n", + "kfold = KFold(n_splits=10, random_state=7)\n", + "model = RandomForestClassifier(n_estimators=num_trees, max_features=max_features)\n", + "results = cross_val_score(model, X, Y, cv=kfold)\n", + "print(results.mean())\n", + "\n", + "model.fit(X,Y)\n", + "output = model.predict(test.values)\n", + "\n", + "from sklearn.metrics import matthews_corrcoef\n", + "mcc = matthews_corrcoef(model.predict(X),Y)\n", + "print('MCC:',mcc)\n", + " \n", + "report_rf = pd.DataFrame(output)\n", + "report_rf.columns = ['CLASS']\n", + "report_rf.index.name = \"Index\"\n", + "report_rf['CLASS']=report_rf['CLASS'].map({0.0:False, 1.0:True})\n", + "report_rf.to_csv(\"report_rf.csv\")\n", + "\n", + "\n", + "print(report_rf['CLASS'].unique())\n", + "print('False: ',report_rf.groupby('CLASS').size()[0].sum())\n", + "print('True: ',report_rf.groupby('CLASS').size()[1].sum())\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Extra Trees" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.ensemble import ExtraTreesClassifier\n", + "\n", + "X = array[:,0:11]\n", + "Y = array[:,11]\n", + "\n", + "num_trees = 100\n", + "max_features = 7\n", + "\n", + "kfold = KFold(n_splits=10, random_state=7)\n", + "\n", + "model = ExtraTreesClassifier(n_estimators=num_trees, max_features=max_features)\n", + "\n", + "results = cross_val_score(model, X, Y, cv=kfold)\n", + "\n", + "print(results.mean())\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Voting Ensemble" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Voting Ensemble for Classification\n", + "from pandas import read_csv\n", + "from sklearn.model_selection import KFold\n", + "from sklearn.model_selection import cross_val_score\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.tree import DecisionTreeClassifier\n", + "from xgboost import XGBClassifier\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "from sklearn.svm import SVC\n", + "from sklearn.ensemble import VotingClassifier\n", + "\n", + "\n", + "X = array[:,0:11]\n", + "Y = array[:,11]\n", + "kfold = KFold(n_splits=10, random_state=7)\n", + "\n", + "# create the sub models\n", + "estimators = []\n", + "model1 = LogisticRegression()\n", + "estimators.append(('logistic', model1))\n", + "\n", + "model2 = DecisionTreeClassifier()\n", + "estimators.append(('cart', model2))\n", + "\n", + "model3 = SVC()\n", + "estimators.append(('svm', model3))\n", + "\n", + "model4 = XGBClassifier()\n", + "estimators.append(('xgb', model4))\n", + "\n", + "model5 = RandomForestClassifier()\n", + "estimators.append(('rfc', model5))\n", + "\n", + "# create the ensemble model\n", + "ensemble = VotingClassifier(estimators)\n", + "results = cross_val_score(ensemble, X, Y, cv=kfold)\n", + "print(results.mean())\n", + "\n", + "\n", + "model.fit(X,Y)\n", + "output = model.predict(test.values)\n", + "\n", + "from sklearn.metrics import matthews_corrcoef\n", + "mcc = matthews_corrcoef(model.predict(X),Y)\n", + "print('MCC:',mcc)\n", + " \n", + "report_v = pd.DataFrame(output)\n", + "report_v.columns = ['CLASS']\n", + "report_v.index.name = \"Index\"\n", + "report_v['CLASS']=report_v['CLASS'].map({0.0:False, 1.0:True})\n", + "report_v.to_csv(\"report_v.csv\")\n", + "\n", + "\n", + "print(report_v['CLASS'].unique())\n", + "print('False: ',report_v.groupby('CLASS').size()[0].sum())\n", + "print('True: ',report_v.groupby('CLASS').size()[1].sum())\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## comparing the algorithms" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# prepare models and add them to a list\n", + "from matplotlib import pyplot\n", + "\n", + "models = []\n", + "models.append(('LR', LogisticRegression(solver='liblinear', multi_class='ovr')))\n", + "models.append(('LDA', LinearDiscriminantAnalysis()))\n", + "models.append(('KNN', KNeighborsClassifier()))\n", + "models.append(('CART', DecisionTreeClassifier()))\n", + "models.append(('NB', GaussianNB()))\n", + "models.append(('SVM', SVC(gamma='auto')))\n", + "models.append(('ETC', ExtraTreesClassifier()))\n", + "models.append(('RFC', RandomForestClassifier()))\n", + "\n", + "# evaluate each model in turn\n", + "results = []\n", + "names = []\n", + "scoring = 'accuracy'\n", + "\n", + "for name, model in models:\n", + " kfold = KFold(n_splits=10, random_state=7)\n", + " cv_results = cross_val_score(model, X, Y, cv=kfold, scoring=scoring)\n", + " results.append(cv_results)\n", + " names.append(name)\n", + " msg = (name, cv_results.mean(), cv_results.std())\n", + " print(msg)\n", + "\n", + "# boxplot algorithm comparison\n", + "fig = pyplot.figure()\n", + "fig.suptitle('Algorithm Comparison')\n", + "ax = fig.add_subplot(111)\n", + "pyplot.boxplot(results)\n", + "ax.set_xticklabels(names)\n", + "pyplot.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# '''''''''''''''''''''''''''''''END''''''''''''''''''''''''''''''" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + }, + "varInspector": { + "cols": { + "lenName": 16, + "lenType": 16, + "lenVar": 40 + }, + "kernels_config": { + "python": { + "delete_cmd_postfix": "", + "delete_cmd_prefix": "del ", + "library": "var_list.py", + "varRefreshCmd": "print(var_dic_list())" + }, + "r": { + "delete_cmd_postfix": ") ", + "delete_cmd_prefix": "rm(", + "library": "var_list.r", + "varRefreshCmd": "cat(var_dic_list()) " + } + }, + "types_to_exclude": [ + "module", + "function", + "builtin_function_or_method", + "instance", + "_Feature" + ], + "window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Assignment Colab/KAYAGA NASHIM MILVAT.ipynb b/Assignment Colab/KAYAGA NASHIM MILVAT.ipynb index e6b0219..36296d9 100644 --- a/Assignment Colab/KAYAGA NASHIM MILVAT.ipynb +++ b/Assignment Colab/KAYAGA NASHIM MILVAT.ipynb @@ -1 +1,3015 @@ -{"cells":[{"metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","trusted":true},"cell_type":"code","source":"# This Python 3 environment comes with many helpful analytics libraries installed\n# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python\n# For example, here's several helpful packages to load in \n\nimport numpy as np # linear algebra\nimport pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\nimport matplotlib.pyplot as plt\nimport seaborn as sns\n# Input data files are available in the \"../input/\" directory.\n# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n\nimport os\nfor dirname, _, filenames in os.walk('/kaggle/input'):\n for filename in filenames:\n print(os.path.join(dirname, filename))\n \n\n# Any results you write to the current directory are saved as output.","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"# References\n#### https://machinelearningmastery.com/evaluate-performance-machine-learning-algorithms-python-using-resampling/\n#### https://www.dataquest.io/blog/top-10-machine-learning-algorithms-for-beginners/\n#### https://monkeylearn.com/blog/introduction-to-support-vector-machines-svm/\n#### https://towardsdatascience.com/understanding-random-forest-58381e0602d2"},{"metadata":{"_uuid":"d629ff2d2480ee46fbb7e2d37f6b5fab8052498a","_cell_guid":"79c7e3d0-c299-4dcb-8224-4455121ee9b0","trusted":true},"cell_type":"code","source":"import warnings\nwarnings.filterwarnings('ignore')","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"test = pd.read_csv(\"../input/ace-class-assignment/Test.csv\")\ntest","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"test.shape","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"#read in the data\ndata = pd.read_csv(\"../input/ace-class-assignment/AMP_TrainSet.csv\")\ndata.head(5)","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":""},{"metadata":{},"cell_type":"markdown","source":"## Analyze data by describing\n\n#### This step helped me know which features are in my dataset, are they categorical or numerical.\n#### How many rows and columns does the dataset have\n#### The data types for the various features\n#### Checked whether the dataset has null or missing values"},{"metadata":{"trusted":true},"cell_type":"code","source":"#Check the dimensions to the number of rows and columns\ndata.shape","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"data.columns","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"data.dtypes","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"#Generate descriptive statistics that summarize the central tendency, dispersion, and shape of a dataset’s distribution, excluding NaN values\ndata.describe()","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"#number of null values in each column\ndata.isnull().sum()\n#since my data has no null values then its good to go","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":""},{"metadata":{},"cell_type":"markdown","source":"#### needed to know how balanced the class values are"},{"metadata":{"trusted":true},"cell_type":"code","source":"\ndata.groupby('CLASS').size().plot(kind='bar')","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"#### Its a good idea to review all the pairwise correlations of the attributes in the dataset because some machine learning algorithm like linear and logistic regression can suffer poor performance if there are highly correlated attributes in the dataset"},{"metadata":{"trusted":true},"cell_type":"code","source":"data.corr(method='pearson')","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"#### heat map to show the correlation of the data; plots that show the interactions between multiple variables in the dataset\n#### Correlation gives an indication of how related the changes are between two variables. If two variables change in the same direction they are positively correlated. If they change in opposite directions together (one goes up, one goes down), then they are negatively correlated. "},{"metadata":{"trusted":true},"cell_type":"code","source":"plt.figure(figsize=(8,8))\nsns.heatmap(data.corr(method='pearson'))\n","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"#### also checked the corelation in regards to the class since am trying to build a ML agorithm for that class"},{"metadata":{"trusted":true},"cell_type":"code","source":"\ndata.corr(method='pearson')['CLASS']","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"#### Most of my variables are positively skewed"},{"metadata":{"trusted":true},"cell_type":"code","source":" data.skew().plot(kind='bar')","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"## understanding data with visualization\n#### Data can be visualised in many ways that is univariate plots and multivariate plots #### Used the Histogram for univariate plot as shown below and the correlation matrix plot as the multivariate plot as shown above"},{"metadata":{},"cell_type":"markdown","source":"## Histogram\n#### This helps to understand each attribute of my dataset independently"},{"metadata":{},"cell_type":"markdown","source":"## Data pre-processing"},{"metadata":{"trusted":true},"cell_type":"code","source":"plt.figure(figsize=(18,18))\ndata.hist()\nplt.subplots_adjust(bottom=3, right=2, top=5)\nplt.show()","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"## Standardize data\n#### Standardization is a useful technique to transform attributes with a Gaussian distribution and differing means and standard deviations to a standard Gaussian distribution with a mean of 0 and a standard deviation of 1"},{"metadata":{"trusted":true},"cell_type":"code","source":"from sklearn.preprocessing import StandardScaler\narray = data.values\n#separate array into input and output components\nX = array[:,0:11]\nY = array[:,11]\nscaler = StandardScaler().fit(X)\nrescaledX = scaler.transform(X)\n# summarize transformed data\n#set_printoptions(precision=3)\nprint(rescaledX[0:5,:])","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"array = test.values\nscaler = StandardScaler().fit(array)\nrescaledt = scaler.transform(array)\n# summarize transformed data\n#set_printoptions(precision=3)\nprint(rescaledt[0:5,:])","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"## Feature selection\n\n"},{"metadata":{},"cell_type":"markdown","source":"#### it's the process of selecting a subset of relevant features for use in model construction"},{"metadata":{},"cell_type":"markdown","source":"### Chose Recursive Feature Elimination\n#### This is an automatic feature selection technique\n#### Used logistic regression it is a good baseline as it is fast to train and predict and scales well.\n"},{"metadata":{"trusted":true},"cell_type":"code","source":"from sklearn.feature_selection import RFE\nfrom sklearn.linear_model import LogisticRegression\n\narray = data.values\nX = array[:,0:11]\nY = array[:,11]\n# feature extraction\nmodel = LogisticRegression()\nrfe = RFE(model,8)\nfit = rfe.fit(X,Y)\nprint(\"Num Features:\", fit.n_features_)\nprint(\"Selected Features:\", fit.support_)\nprint(\"Feature Ranking:\", fit.ranking_)","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"X[:,fit.support_]","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"drop=data.drop(['FULL_AcidicMolPerc', 'FULL_DAYM780201', 'AS_DAYM780201'],axis=1)\ndrop","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"drop_test = test.drop(['FULL_AcidicMolPerc', 'FULL_DAYM780201', 'AS_DAYM780201'],axis=1)\ndrop_test","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"1. #### Decided to first use all the first\n"},{"metadata":{},"cell_type":"markdown","source":"# Evaluate the Performance of Machine Learning Algorithms with Resampling¶\n"},{"metadata":{},"cell_type":"markdown","source":"#### The best way to evaluate the performance of an algorithm would be to make predictions for new data to which you already know the answers."},{"metadata":{},"cell_type":"markdown","source":"## Split into Train and Test Sets"},{"metadata":{},"cell_type":"markdown","source":"#### This algorithm evaluation technique is very fast. It is ideal for large datasets where there is strong evidence that both splits of the data are representative of the underlying problem. Because of the speed, it is useful to use this approach when the algorithm you are investigating is slow to train.\n\n"},{"metadata":{"trusted":true},"cell_type":"code","source":"from sklearn.model_selection import train_test_split\nfrom sklearn.linear_model import LogisticRegression\n\narray = data.values\nX = array[:,0:11]\nY = array[:,11]\ntest_size = 0.30\nseed = 7\nX_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=test_size,\nrandom_state=seed)\nmodel = LogisticRegression()\nmodel.fit(X_train, Y_train)\nresult = model.score(X_test, Y_test)\nprint(\"Accuracy: \", (result*100.0))\n\n\nmodel.fit(X,Y)\noutput = model.predict(test.values)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(X),Y)\nprint('MCC:',mcc)\n \nreport = pd.DataFrame(output)\nreport.columns = ['CLASS']\nreport.index.name = \"Index\"\nreport['CLASS']=report['CLASS'].map({0.0:False, 1.0:True})\nreport.to_csv(\"report.csv\")\n\nprint(report['CLASS'].unique())\nprint('False: ',report.groupby('CLASS').size()[0].sum())\nprint('True: ',report.groupby('CLASS').size()[1].sum())\n","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"## K-fold Cross Validation"},{"metadata":{},"cell_type":"markdown","source":"#### It is more accurate because the algorithm is trained and evaluated multiple times on different data."},{"metadata":{"trusted":true},"cell_type":"code","source":"from sklearn.model_selection import KFold\nfrom sklearn.model_selection import cross_val_score\n\nnum_folds = 10 #number of folds to use\nseed = 7 #reproducibility\n\nkfold = KFold(n_splits=num_folds, random_state=seed)\nmodel = LogisticRegression()\nresults = cross_val_score(model, X, Y, cv=kfold)\n\nprint(f\"Accuracy:\", (results.mean()*100.0, results.std()*100.0))\n\n\nmodel.fit(X,Y)\noutput = model.predict(test.values)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(X),Y)\nprint('MCC:',mcc)\n \nreport_kf = pd.DataFrame(output)\nreport_kf.columns = ['CLASS']\nreport_kf.index.name = \"Index\"\nreport_kf['CLASS']=report_kf['CLASS'].map({0.0:False, 1.0:True})\nreport_kf.to_csv(\"report_kf.csv\")\n\nprint(report_kf['CLASS'].unique())\nprint('False: ',report_kf.groupby('CLASS').size()[0].sum())\nprint('True: ',report_kf.groupby('CLASS').size()[1].sum())\n\n","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"## Leave One Out Cross Validation"},{"metadata":{"trusted":true},"cell_type":"code","source":"from sklearn.model_selection import LeaveOneOut\nfrom sklearn.model_selection import cross_val_score\n\nnum_folds = 10\nloocv = LeaveOneOut()\nmodel = LogisticRegression()\nresults = cross_val_score(model, X, Y, cv=loocv)\nprint(\"Accuracy:\", (results.mean()*100.0, results.std()*100.0))\n\n\nmodel.fit(X,Y)\noutput = model.predict(test.values)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(X),Y)\nprint('MCC:',mcc)\n \nreport_l = pd.DataFrame(output)\nreport_l.columns = ['CLASS']\nreport_l.index.name = \"Index\"\nreport_l['CLASS']=report_l['CLASS'].map({0.0:False, 1.0:True})\nreport_l.to_csv(\"report_l.csv\")\n\nprint(report_l['CLASS'].unique())\nprint('False: ',report_l.groupby('CLASS').size()[0].sum())\nprint('True: ',report_l.groupby('CLASS').size()[1].sum())\n","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"## Repeated Random Test-Train Splits"},{"metadata":{},"cell_type":"markdown","source":"#### Creates a random split of the data like the train/test split , but repeats the process of splitting and evaluation of the algorithm multiple times, like cross validation. Repeated random splits can be useful intermediates when trying to balance variance in the estimated performance, model training speed and dataset size\n#### In this I prefered using Repeated Random Test_Train Splits because when you look at the dataset the zeros are one side and the ones on the otherside in the 'class' column. So I would prefer to first shuffle the data and then split it to reduce on the bias"},{"metadata":{"trusted":true},"cell_type":"code","source":"from sklearn.model_selection import ShuffleSplit\nfrom sklearn.model_selection import cross_val_score\n\nn_splits = 10\ntest_size = 0.30\nseed = 7\nkfold = ShuffleSplit(n_splits=n_splits, test_size=test_size, random_state=seed)\nmodel = LogisticRegression()\nresults = cross_val_score(model, X, Y, cv=kfold)\nprint(\"Accuracy: \" , (results.mean()*100.0, results.std()*100.0))\n\n\nmodel.fit(X,Y)\noutput = model.predict(test.values)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(X),Y)\nprint('MCC:',mcc)\n \nreport_rrt = pd.DataFrame(output)\nreport_rrt.columns = ['CLASS']\nreport_rrt.index.name = \"Index\"\nreport_rrt['CLASS']=report_rrt['CLASS'].map({0.0:False, 1.0:True})\nreport_rrt.to_csv(\"report_rrt.csv\")\n\nprint(report_rrt['CLASS'].unique())\nprint('False: ',report_rrt.groupby('CLASS').size()[0].sum())\nprint('True: ',report_rrt.groupby('CLASS').size()[1].sum())\n\n","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"# Machine Learning Algorithm Performance Metrics"},{"metadata":{},"cell_type":"markdown","source":"## Algorithms Overview\n### linear machine learning algorithms:\n\n Logistic Regression.\n Linear Discriminant Analysis.\n### onlinear machine learning algorithms\n\n k-Nearest Neighbors.\n Naive Bayes.\n Classication and Regression Trees.\n Support Vector Machines.\n"},{"metadata":{},"cell_type":"markdown","source":"## Linear Machine Learning Algorithms"},{"metadata":{},"cell_type":"markdown","source":"### Logistic Regression"},{"metadata":{},"cell_type":"markdown","source":"#### Logistic regression is best suited for binary classification: data sets where y = 0 or 1"},{"metadata":{},"cell_type":"markdown","source":"### Using standardized data"},{"metadata":{"trusted":true},"cell_type":"code","source":"# Logistic regression on standardized data\nnum_folds = 10\nkfold = KFold(n_splits=10, random_state=7)\nmodel = LogisticRegression()\nresults = cross_val_score(model, X, Y, cv=kfold)\nprint(results.mean())\n\nmodel.fit(rescaledX,Y)\noutput = model.predict(rescaledt)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(X),Y)\nprint('MCC:',mcc)\n \nreport_scaled = pd.DataFrame(output)\nreport_scaled.columns = ['CLASS']\nreport_scaled.index.name = \"Index\"\nreport_scaled['CLASS']=report_scaled['CLASS'].map({0.0:False, 1.0:True})\nreport_scaled.to_csv(\"report_scaled.csv\")\n\nprint(report_scaled['CLASS'].unique())\nprint('False: ',report_scaled.groupby('CLASS').size()[0].sum())\nprint('True: ',report_scaled.groupby('CLASS').size()[1].sum())\n","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"# Logistic Regression Classification\n\nnum_folds = 10\nkfold = KFold(n_splits=10, random_state=7)\nmodel = LogisticRegression()\nresults = cross_val_score(model, X, Y, cv=kfold)\nprint(results.mean())\n\nmodel.fit(X,Y)\noutput = model.predict(test.values)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(X),Y)\nprint('MCC:',mcc)\n \nmy_report = pd.DataFrame(output)\nmy_report.columns = ['CLASS']\nmy_report.index.name = \"Index\"\nmy_report['CLASS']=my_report['CLASS'].map({0.0:False, 1.0:True})\nmy_report.to_csv(\"report_XGB.csv\")\n\nprint(my_report['CLASS'].unique())\nprint('False: ',my_report.groupby('CLASS').size()[0].sum())\nprint('True: ',my_report.groupby('CLASS').size()[1].sum())","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":""},{"metadata":{},"cell_type":"markdown","source":"## Linear Discriminant Analysis¶\n\n"},{"metadata":{"trusted":true},"cell_type":"code","source":"from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n\nnum_folds = 10\nkfold = KFold(n_splits=10, random_state=7)\nmodel = LinearDiscriminantAnalysis()\nresults = cross_val_score(model, X, Y, cv=kfold)\nprint(results.mean())\n\n\nmodel.fit(X,Y)\noutput = model.predict(test.values)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(X),Y)\nprint('MCC:',mcc)\n \nlda_report = pd.DataFrame(output)\nlda_report.columns = ['CLASS']\nlda_report.index.name = \"Index\"\nlda_report['CLASS']=lda_report['CLASS'].map({0.0:False, 1.0:True})\nlda_report.to_csv(\"ldareport.csv\")\n\nprint(lda_report['CLASS'].unique())\nprint('False: ',lda_report.groupby('CLASS').size()[0].sum())\nprint('True: ',lda_report.groupby('CLASS').size()[1].sum())\n\n","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"## Nonlinear Machine Learning Algorithms"},{"metadata":{},"cell_type":"markdown","source":"### k-Nearest Neighbors"},{"metadata":{"trusted":true},"cell_type":"code","source":"from sklearn.neighbors import KNeighborsClassifier\nnum_folds = 10\nkfold = KFold(n_splits=10, random_state=7)\nmodel = KNeighborsClassifier()\nresults = cross_val_score(model, X, Y, cv=kfold)\nprint(results.mean())\n\n\n\nmodel.fit(X,Y)\noutput = model.predict(test.values)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(X),Y)\nprint('MCC:',mcc)\n \nreport_k = pd.DataFrame(output)\nreport_k.columns = ['CLASS']\nreport_k.index.name = \"Index\"\nreport_k['CLASS']=report_k['CLASS'].map({0.0:False, 1.0:True})\nreport_k.to_csv(\"report_k.csv\")\n\n\nprint(report_k['CLASS'].unique())\nprint('False: ',report_k.groupby('CLASS').size()[0].sum())\nprint('True: ',report_k.groupby('CLASS').size()[1].sum())","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"### Naive Bayes"},{"metadata":{},"cell_type":"markdown","source":"### Tried using Standardised data on Naive Bayes\n\n### When I predicted Naive Bayes on Standardised data gave me a score of 0.98235, after feature selection it gave 0.90 and on unstandardised data it gave a score of 0.9959"},{"metadata":{"trusted":true},"cell_type":"code","source":"# Naive Bayes on standardised data\nfrom sklearn.naive_bayes import GaussianNB\n\nkfold = KFold(n_splits=10, random_state=7)\nmodel = GaussianNB()\nresults = cross_val_score(model, X, Y, cv=kfold)\nprint(results.mean())\n\n\nmodel.fit(rescaledX,Y)\noutput = model.predict(rescaledt)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(X),Y)\nprint('MCC:',mcc)\n \nreport_rebayes = pd.DataFrame(output)\nreport_rebayes.columns = ['CLASS']\nreport_rebayes.index.name = \"Index\"\nreport_rebayes['CLASS']=report_rebayes['CLASS'].map({0.0:False, 1.0:True})\nreport_rebayes.to_csv(\"report_rebayes.csv\")\n\n\nprint(report_rebayes['CLASS'].unique())\nprint('False: ',report_rebayes.groupby('CLASS').size()[0].sum())\nprint('True: ',report_rebayes.groupby('CLASS').size()[1].sum())","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"## Naive Bayes on selected features"},{"metadata":{"trusted":true},"cell_type":"code","source":"# Naive Bayes on selected features\n\narray = data.values\nX = array[:,0:11]\nY = array[:,11]\n\nselectedX = X[:,fit.support_]\n\narray2 =test.values\nselectedT = array2[:,fit.support_]\n\nkfold = KFold(n_splits=10, random_state=7)\nmodel = GaussianNB()\nresults = cross_val_score(model, selectedX, Y, cv=kfold)\nprint(results.mean())\n\n\nmodel.fit(selectedX,Y)\noutput = model.predict(selectedT)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(selectedX),Y)\nprint('MCC:',mcc)\n \nreport_sel = pd.DataFrame(output)\nreport_sel.columns = ['CLASS']\nreport_sel.index.name = \"Index\"\nreport_sel['CLASS']=report_sel['CLASS'].map({0.0:False, 1.0:True})\nreport_sel.to_csv(\"report_sel.csv\")\n\n\nprint(report_sel['CLASS'].unique())\nprint('False: ',report_sel.groupby('CLASS').size()[0].sum())\nprint('True: ',report_sel.groupby('CLASS').size()[1].sum())\n","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"from sklearn.naive_bayes import GaussianNB\n\nkfold = KFold(n_splits=10, random_state=7)\nmodel = GaussianNB()\nresults = cross_val_score(model, X, Y, cv=kfold)\nprint(results.mean())\n\n\nmodel.fit(X,Y)\noutput = model.predict(test.values)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(X),Y)\nprint('MCC:',mcc)\n \nreport_bayes = pd.DataFrame(output)\nreport_bayes.columns = ['CLASS']\nreport_bayes.index.name = \"Index\"\nreport_bayes['CLASS']=report_bayes['CLASS'].map({0.0:False, 1.0:True})\nreport_bayes.to_csv(\"report_bayes.csv\")\n\n\nprint(report_bayes['CLASS'].unique())\nprint('False: ',report_bayes.groupby('CLASS').size()[0].sum())\nprint('True: ',report_bayes.groupby('CLASS').size()[1].sum())","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"### Classiffication and Regression Trees"},{"metadata":{},"cell_type":"markdown","source":"#### used for classification or regression predictive modeling problems"},{"metadata":{"trusted":true},"cell_type":"code","source":"from sklearn.tree import DecisionTreeClassifier\nkfold = KFold(n_splits=10, random_state=7)\nmodel = DecisionTreeClassifier()\nresults = cross_val_score(model, X, Y, cv=kfold)\nprint(results.mean())\n\n\nmodel.fit(X,Y)\noutput = model.predict(test.values)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(X),Y)\nprint('MCC:',mcc)\n \nreport_tree = pd.DataFrame(output)\nreport_tree.columns = ['CLASS']\nreport_tree.index.name = \"Index\"\nreport_tree['CLASS']=report_tree['CLASS'].map({0.0:False, 1.0:True})\nreport_tree.to_csv(\"report_tree.csv\")\n\n\nprint(report_tree['CLASS'].unique())\nprint('False: ',report_tree.groupby('CLASS').size()[0].sum())\nprint('True: ',report_tree.groupby('CLASS').size()[1].sum())","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"### Support Vector Machines "},{"metadata":{},"cell_type":"markdown","source":"#### A support vector machine (SVM) is a supervised machine learning model that uses classification algorithms for two-group classification problems"},{"metadata":{"trusted":true},"cell_type":"code","source":"from sklearn.svm import SVC\n\nkfold = KFold(n_splits=10, random_state=7)\nmodel = SVC()\nresults = cross_val_score(model, X, Y, cv=kfold)\nprint(results.mean())\n\nmodel.fit(X,Y)\noutput = model.predict(test.values)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(X),Y)\nprint('MCC:',mcc)\n \nreport_svm = pd.DataFrame(output)\nreport_svm.columns = ['CLASS']\nreport_svm.index.name = \"Index\"\nreport_svm['CLASS']=report_svm['CLASS'].map({0.0:False, 1.0:True})\nreport_svm.to_csv(\"report_svm.csv\")\n\n\nprint(report_svm['CLASS'].unique())\nprint('False: ',report_svm.groupby('CLASS').size()[0].sum())\nprint('True: ',report_svm.groupby('CLASS').size()[1].sum())\n\n\n","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"# Combine Models Into Ensemble Predictions\n\nThe three most popular methods for combining the predictions from different models are:\n \n Bagging\n Boosting\n Voting"},{"metadata":{},"cell_type":"markdown","source":"> # BoostingAlgorithms"},{"metadata":{},"cell_type":"markdown","source":"#### These seek to improve the prediction power by training a sequence of weak models, each compensating the weaknesses of its predecessors.\n"},{"metadata":{},"cell_type":"markdown","source":"## AdaBoost"},{"metadata":{},"cell_type":"markdown","source":"#### This is specifically designed for classification problems"},{"metadata":{"trusted":true},"cell_type":"code","source":"# AdaBoost Classification\nfrom pandas import read_csv\nfrom sklearn.model_selection import KFold\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn.ensemble import AdaBoostClassifier\n\n\nX = array[:,0:11]\nY = array[:,11]\n\nnum_trees = 39\nseed=10\n\nkfold = KFold(n_splits=10, random_state=seed)\n\nmodel = AdaBoostClassifier(n_estimators=num_trees, random_state=seed)\nresults = cross_val_score(model, X, Y, cv=kfold)\n\nprint(results.mean())\n\nmodel.fit(X,Y)\noutput = model.predict(test.values)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(X),Y)\nprint('MCC:',mcc)\n \nreport_ada = pd.DataFrame(output)\nreport_ada.columns = ['CLASS']\nreport_ada.index.name = \"Index\"\nreport_ada['CLASS']=report_ada['CLASS'].map({0.0:False, 1.0:True})\nreport_ada.to_csv(\"report_ada.csv\")\n\n\nprint(report_ada['CLASS'].unique())\nprint('False: ',report_ada.groupby('CLASS').size()[0].sum())\nprint('True: ',report_ada.groupby('CLASS').size()[1].sum())\n\n","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"## Bagging Algorithms"},{"metadata":{},"cell_type":"markdown","source":"#### Bagging is used with decision trees where it significantly raises the stability of models in the reduction of variance and improving accuracy, which eliminates the challenge of overfitting."},{"metadata":{},"cell_type":"markdown","source":"## Bagged Decision Trees"},{"metadata":{"trusted":true},"cell_type":"code","source":"# Bagged Decision Trees for Classification\nfrom pandas import read_csv\nfrom sklearn.model_selection import KFold\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn.ensemble import BaggingClassifier\nfrom sklearn.tree import DecisionTreeClassifier\n\n#split the data in portions\nX = array[:,0:11]\nY = array[:,11]\nseed = 7 #duplication\n\n#split according to cross validation\nkfold = KFold(n_splits=10, random_state=seed)\n\n#initialize the model\ncart = DecisionTreeClassifier()\n\n#bagging\nnum_trees = 250\n\n#model\nmodel = BaggingClassifier(base_estimator=cart, n_estimators=num_trees, random_state=seed)\n\nresults = cross_val_score(model, X, Y, cv=kfold)\nprint(results.mean())\n\nmodel.fit(X,Y)\noutput = model.predict(test.values)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(X),Y)\nprint('MCC:',mcc)\n \nreport_bag = pd.DataFrame(output)\nreport_bag.columns = ['CLASS']\nreport_bag.index.name = \"Index\"\nreport_bag['CLASS']=report_bag['CLASS'].map({0.0:False, 1.0:True})\nreport_bag.to_csv(\"report_bag.csv\")\n\n\nprint(report_bag['CLASS'].unique())\nprint('False: ',report_bag.groupby('CLASS').size()[0].sum())\nprint('True: ',report_bag.groupby('CLASS').size()[1].sum())\n\n","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"## Random Forest"},{"metadata":{"trusted":true},"cell_type":"code","source":"# Random Forest Classification\nfrom pandas import read_csv\nfrom sklearn.model_selection import KFold\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn.ensemble import RandomForestClassifier\n\n\nX = array[:,0:11]\nY = array[:,11]\n\nnum_trees = 1000\n\nmax_features = 3\n\nkfold = KFold(n_splits=10, random_state=7)\nmodel = RandomForestClassifier(n_estimators=num_trees, max_features=max_features)\nresults = cross_val_score(model, X, Y, cv=kfold)\nprint(results.mean())\n\nmodel.fit(X,Y)\noutput = model.predict(test.values)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(X),Y)\nprint('MCC:',mcc)\n \nreport_rf = pd.DataFrame(output)\nreport_rf.columns = ['CLASS']\nreport_rf.index.name = \"Index\"\nreport_rf['CLASS']=report_rf['CLASS'].map({0.0:False, 1.0:True})\nreport_rf.to_csv(\"report_rf.csv\")\n\n\nprint(report_rf['CLASS'].unique())\nprint('False: ',report_rf.groupby('CLASS').size()[0].sum())\nprint('True: ',report_rf.groupby('CLASS').size()[1].sum())\n","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":""},{"metadata":{},"cell_type":"markdown","source":"## Extra Trees"},{"metadata":{"trusted":true},"cell_type":"code","source":"from sklearn.ensemble import ExtraTreesClassifier\n\nX = array[:,0:11]\nY = array[:,11]\n\nnum_trees = 100\nmax_features = 7\n\nkfold = KFold(n_splits=10, random_state=7)\n\nmodel = ExtraTreesClassifier(n_estimators=num_trees, max_features=max_features)\n\nresults = cross_val_score(model, X, Y, cv=kfold)\n\nprint(results.mean())\n","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"## Voting Ensemble"},{"metadata":{"trusted":true},"cell_type":"code","source":"# Voting Ensemble for Classification\nfrom pandas import read_csv\nfrom sklearn.model_selection import KFold\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.tree import DecisionTreeClassifier\nfrom xgboost import XGBClassifier\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.svm import SVC\nfrom sklearn.ensemble import VotingClassifier\n\n\nX = array[:,0:11]\nY = array[:,11]\nkfold = KFold(n_splits=10, random_state=7)\n\n# create the sub models\nestimators = []\nmodel1 = LogisticRegression()\nestimators.append(('logistic', model1))\n\nmodel2 = DecisionTreeClassifier()\nestimators.append(('cart', model2))\n\nmodel3 = SVC()\nestimators.append(('svm', model3))\n\nmodel4 = XGBClassifier()\nestimators.append(('xgb', model4))\n\nmodel5 = RandomForestClassifier()\nestimators.append(('rfc', model5))\n\n# create the ensemble model\nensemble = VotingClassifier(estimators)\nresults = cross_val_score(ensemble, X, Y, cv=kfold)\nprint(results.mean())\n\n\nmodel.fit(X,Y)\noutput = model.predict(test.values)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(X),Y)\nprint('MCC:',mcc)\n \nreport_v = pd.DataFrame(output)\nreport_v.columns = ['CLASS']\nreport_v.index.name = \"Index\"\nreport_v['CLASS']=report_v['CLASS'].map({0.0:False, 1.0:True})\nreport_v.to_csv(\"report_v.csv\")\n\n\nprint(report_v['CLASS'].unique())\nprint('False: ',report_v.groupby('CLASS').size()[0].sum())\nprint('True: ',report_v.groupby('CLASS').size()[1].sum())\n\n\n","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"## comparing the algorithms"},{"metadata":{"trusted":true},"cell_type":"code","source":"\n# prepare models and add them to a list\nfrom matplotlib import pyplot\n\nmodels = []\nmodels.append(('LR', LogisticRegression(solver='liblinear', multi_class='ovr')))\nmodels.append(('LDA', LinearDiscriminantAnalysis()))\nmodels.append(('KNN', KNeighborsClassifier()))\nmodels.append(('CART', DecisionTreeClassifier()))\nmodels.append(('NB', GaussianNB()))\nmodels.append(('SVM', SVC(gamma='auto')))\nmodels.append(('ETC', ExtraTreesClassifier()))\nmodels.append(('RFC', RandomForestClassifier()))\n\n# evaluate each model in turn\nresults = []\nnames = []\nscoring = 'accuracy'\n\nfor name, model in models:\n kfold = KFold(n_splits=10, random_state=7)\n cv_results = cross_val_score(model, X, Y, cv=kfold, scoring=scoring)\n results.append(cv_results)\n names.append(name)\n msg = (name, cv_results.mean(), cv_results.std())\n print(msg)\n\n# boxplot algorithm comparison\nfig = pyplot.figure()\nfig.suptitle('Algorithm Comparison')\nax = fig.add_subplot(111)\npyplot.boxplot(results)\nax.set_xticklabels(names)\npyplot.show()","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"# '''''''''''''''''''''''''''''''END''''''''''''''''''''''''''''''"},{"metadata":{},"cell_type":"markdown","source":""},{"metadata":{},"cell_type":"markdown","source":""}],"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"name":"python","version":"3.6.4","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat":4,"nbformat_minor":4} \ No newline at end of file +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19", + "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5" + }, + "outputs": [], + "source": [ + "# This Python 3 environment comes with many helpful analytics libraries installed\n", + "# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python\n", + "# For example, here's several helpful packages to load in \n", + "\n", + "import numpy as np # linear algebra\n", + "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "# Input data files are available in the \"../input/\" directory.\n", + "# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n", + "\n", + "import os\n", + "#for dirname, _, filenames in os.walk('/kaggle/input'):\n", + "# for filename in filenames:\n", + "# print(os.path.join(dirname, filename))\n", + " \n", + "\n", + "# Any results you write to the current directory are saved as output." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# References\n", + "#### https://machinelearningmastery.com/evaluate-performance-machine-learning-algorithms-python-using-resampling/\n", + "#### https://www.dataquest.io/blog/top-10-machine-learning-algorithms-for-beginners/\n", + "#### https://monkeylearn.com/blog/introduction-to-support-vector-machines-svm/\n", + "#### https://towardsdatascience.com/understanding-random-forest-58381e0602d2\n", + "\n", + "\n", + "
\n", + " Please put these at the bottom." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "_cell_guid": "79c7e3d0-c299-4dcb-8224-4455121ee9b0", + "_uuid": "d629ff2d2480ee46fbb7e2d37f6b5fab8052498a" + }, + "outputs": [], + "source": [ + "import warnings\n", + "warnings.filterwarnings('ignore')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
FULL_ChargeFULL_AcidicMolPercFULL_AURR980107FULL_DAYM780201FULL_GEOR030101FULL_OOBM850104NT_EFC195AS_MeanAmphiMomentAS_DAYM780201AS_FUKS010112CT_RACS820104
04.03.7040.87373.5190.987-4.83300.38274.5567.2251.234
14.04.4440.89262.4440.931-0.58400.32056.0564.9421.853
22.00.0000.90147.0001.039-5.66400.16447.0005.9691.174
34.50.0000.86969.2220.982-5.42302.01069.2225.4621.138
4-4.021.5911.06171.6820.976-2.00202.75866.0005.5821.453
54.56.9770.89568.5120.950-1.87803.09072.0005.7791.844
612.03.1751.02274.4601.010-3.22503.17276.7225.6641.215
71.53.7040.93269.5190.977-2.50902.54372.0004.2511.560
83.03.3330.90359.5000.963-1.68202.99066.0005.1751.514
94.00.0000.87372.7920.998-4.94302.98577.4445.6261.621
1011.08.2190.92775.0680.989-3.11803.49376.3896.0471.126
114.52.7030.96669.7570.972-3.89613.71476.4445.4921.445
120.011.5381.02777.9230.981-3.95413.67978.0567.2221.054
136.03.3331.11479.1001.024-2.43703.98875.5566.6671.079
140.00.0001.00578.9711.1021.54404.14378.5564.4721.280
153.06.0610.89776.4550.955-4.03214.31081.2226.2071.506
163.02.1280.88964.0641.0000.58304.09774.6675.0971.302
179.50.0000.78655.6470.955-0.57703.81661.6674.8292.026
181.512.0000.94870.7200.956-3.55913.98266.5007.0241.050
194.00.0000.82854.8751.048-2.85304.29354.8755.2291.651
205.03.1250.90169.5940.995-1.67703.87658.5005.2781.486
211.50.0000.87267.2000.972-5.39204.47167.2006.5240.986
222.00.0000.78664.1500.969-4.70603.92963.7226.9411.040
238.00.0000.93362.8001.008-4.17004.10462.6674.8701.547
243.010.8111.08677.1081.010-2.11204.20878.0565.7481.249
254.02.9410.90071.2060.967-3.96304.12672.6116.8281.507
2610.00.0000.84569.5110.975-2.04903.97770.7224.8911.543
275.50.0000.94671.6671.023-4.98203.43971.6675.9531.108
289.00.0000.82254.5450.993-3.95503.56255.5006.0861.114
29-6.038.2351.23983.5591.0020.10813.31791.5005.7231.055
....................................
7281.04.1670.93167.8330.936-0.775014.99368.6676.0471.125
7291.07.4070.93081.2590.985-2.859014.99380.7786.0121.051
7300.025.0001.11979.6671.023-0.379014.98677.6676.0421.137
731-2.017.5001.06884.1000.995-1.328014.94390.0006.0811.484
7321.04.5450.83085.0910.996-3.729014.98285.8335.6121.165
7331.011.5381.05070.8851.015-2.530014.98175.7225.3371.327
7340.013.3330.92572.5330.993-2.093018.27972.5335.5251.147
7350.511.1111.13580.7781.034-1.448014.95180.7785.4111.079
736-0.513.3330.95775.2000.969-1.257017.79975.2005.9391.150
7375.010.2040.94778.1840.986-3.313015.30077.1116.2391.050
738-1.019.2311.06591.0771.0040.520015.21495.6115.3691.276
739-1.517.6471.11982.1471.005-1.477015.32181.3335.7711.016
7406.53.7040.90976.1111.009-2.840015.66679.2226.0231.242
7415.00.0000.83680.0970.973-2.795015.11082.6115.9231.245
742-3.513.3331.11575.2891.027-1.292015.05880.3895.8091.300
7434.08.3330.92879.0000.960-3.607015.09180.9445.4171.316
74410.517.0001.09182.1601.003-2.414015.27074.9445.6011.053
7455.512.8210.99667.5130.991-4.674014.98967.9446.9281.166
746-2.019.0481.03085.3330.969-1.162015.45886.5006.2841.156
747-2.520.5131.09982.7951.014-0.965015.71586.3335.5891.127
748-2.025.0001.17886.4171.0131.322022.98986.4175.5841.228
749-4.021.8751.02166.1880.994-0.523015.49971.2225.9391.080
7502.00.0001.05486.4621.051-0.815020.86686.4625.7121.298
751-2.014.8941.04283.2550.978-1.303015.26085.1675.7181.164
752-8.028.2611.22182.1091.014-0.153015.35581.1676.9040.933
753-1.516.0001.10082.8200.991-1.987015.18585.3337.0531.325
754-1.018.1821.08573.4551.027-0.745016.55074.6676.7291.132
755-1.019.0481.10882.1901.033-1.789016.11279.6676.0361.219
756-1.07.1430.95576.7861.0231.141020.63076.7865.6691.111
757-7.017.1431.07884.1861.009-0.066017.16876.6116.6881.305
\n", + "

758 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " FULL_Charge FULL_AcidicMolPerc FULL_AURR980107 FULL_DAYM780201 \\\n", + "0 4.0 3.704 0.873 73.519 \n", + "1 4.0 4.444 0.892 62.444 \n", + "2 2.0 0.000 0.901 47.000 \n", + "3 4.5 0.000 0.869 69.222 \n", + "4 -4.0 21.591 1.061 71.682 \n", + "5 4.5 6.977 0.895 68.512 \n", + "6 12.0 3.175 1.022 74.460 \n", + "7 1.5 3.704 0.932 69.519 \n", + "8 3.0 3.333 0.903 59.500 \n", + "9 4.0 0.000 0.873 72.792 \n", + "10 11.0 8.219 0.927 75.068 \n", + "11 4.5 2.703 0.966 69.757 \n", + "12 0.0 11.538 1.027 77.923 \n", + "13 6.0 3.333 1.114 79.100 \n", + "14 0.0 0.000 1.005 78.971 \n", + "15 3.0 6.061 0.897 76.455 \n", + "16 3.0 2.128 0.889 64.064 \n", + "17 9.5 0.000 0.786 55.647 \n", + "18 1.5 12.000 0.948 70.720 \n", + "19 4.0 0.000 0.828 54.875 \n", + "20 5.0 3.125 0.901 69.594 \n", + "21 1.5 0.000 0.872 67.200 \n", + "22 2.0 0.000 0.786 64.150 \n", + "23 8.0 0.000 0.933 62.800 \n", + "24 3.0 10.811 1.086 77.108 \n", + "25 4.0 2.941 0.900 71.206 \n", + "26 10.0 0.000 0.845 69.511 \n", + "27 5.5 0.000 0.946 71.667 \n", + "28 9.0 0.000 0.822 54.545 \n", + "29 -6.0 38.235 1.239 83.559 \n", + ".. ... ... ... ... \n", + "728 1.0 4.167 0.931 67.833 \n", + "729 1.0 7.407 0.930 81.259 \n", + "730 0.0 25.000 1.119 79.667 \n", + "731 -2.0 17.500 1.068 84.100 \n", + "732 1.0 4.545 0.830 85.091 \n", + "733 1.0 11.538 1.050 70.885 \n", + "734 0.0 13.333 0.925 72.533 \n", + "735 0.5 11.111 1.135 80.778 \n", + "736 -0.5 13.333 0.957 75.200 \n", + "737 5.0 10.204 0.947 78.184 \n", + "738 -1.0 19.231 1.065 91.077 \n", + "739 -1.5 17.647 1.119 82.147 \n", + "740 6.5 3.704 0.909 76.111 \n", + "741 5.0 0.000 0.836 80.097 \n", + "742 -3.5 13.333 1.115 75.289 \n", + "743 4.0 8.333 0.928 79.000 \n", + "744 10.5 17.000 1.091 82.160 \n", + "745 5.5 12.821 0.996 67.513 \n", + "746 -2.0 19.048 1.030 85.333 \n", + "747 -2.5 20.513 1.099 82.795 \n", + "748 -2.0 25.000 1.178 86.417 \n", + "749 -4.0 21.875 1.021 66.188 \n", + "750 2.0 0.000 1.054 86.462 \n", + "751 -2.0 14.894 1.042 83.255 \n", + "752 -8.0 28.261 1.221 82.109 \n", + "753 -1.5 16.000 1.100 82.820 \n", + "754 -1.0 18.182 1.085 73.455 \n", + "755 -1.0 19.048 1.108 82.190 \n", + "756 -1.0 7.143 0.955 76.786 \n", + "757 -7.0 17.143 1.078 84.186 \n", + "\n", + " FULL_GEOR030101 FULL_OOBM850104 NT_EFC195 AS_MeanAmphiMoment \\\n", + "0 0.987 -4.833 0 0.382 \n", + "1 0.931 -0.584 0 0.320 \n", + "2 1.039 -5.664 0 0.164 \n", + "3 0.982 -5.423 0 2.010 \n", + "4 0.976 -2.002 0 2.758 \n", + "5 0.950 -1.878 0 3.090 \n", + "6 1.010 -3.225 0 3.172 \n", + "7 0.977 -2.509 0 2.543 \n", + "8 0.963 -1.682 0 2.990 \n", + "9 0.998 -4.943 0 2.985 \n", + "10 0.989 -3.118 0 3.493 \n", + "11 0.972 -3.896 1 3.714 \n", + "12 0.981 -3.954 1 3.679 \n", + "13 1.024 -2.437 0 3.988 \n", + "14 1.102 1.544 0 4.143 \n", + "15 0.955 -4.032 1 4.310 \n", + "16 1.000 0.583 0 4.097 \n", + "17 0.955 -0.577 0 3.816 \n", + "18 0.956 -3.559 1 3.982 \n", + "19 1.048 -2.853 0 4.293 \n", + "20 0.995 -1.677 0 3.876 \n", + "21 0.972 -5.392 0 4.471 \n", + "22 0.969 -4.706 0 3.929 \n", + "23 1.008 -4.170 0 4.104 \n", + "24 1.010 -2.112 0 4.208 \n", + "25 0.967 -3.963 0 4.126 \n", + "26 0.975 -2.049 0 3.977 \n", + "27 1.023 -4.982 0 3.439 \n", + "28 0.993 -3.955 0 3.562 \n", + "29 1.002 0.108 1 3.317 \n", + ".. ... ... ... ... \n", + "728 0.936 -0.775 0 14.993 \n", + "729 0.985 -2.859 0 14.993 \n", + "730 1.023 -0.379 0 14.986 \n", + "731 0.995 -1.328 0 14.943 \n", + "732 0.996 -3.729 0 14.982 \n", + "733 1.015 -2.530 0 14.981 \n", + "734 0.993 -2.093 0 18.279 \n", + "735 1.034 -1.448 0 14.951 \n", + "736 0.969 -1.257 0 17.799 \n", + "737 0.986 -3.313 0 15.300 \n", + "738 1.004 0.520 0 15.214 \n", + "739 1.005 -1.477 0 15.321 \n", + "740 1.009 -2.840 0 15.666 \n", + "741 0.973 -2.795 0 15.110 \n", + "742 1.027 -1.292 0 15.058 \n", + "743 0.960 -3.607 0 15.091 \n", + "744 1.003 -2.414 0 15.270 \n", + "745 0.991 -4.674 0 14.989 \n", + "746 0.969 -1.162 0 15.458 \n", + "747 1.014 -0.965 0 15.715 \n", + "748 1.013 1.322 0 22.989 \n", + "749 0.994 -0.523 0 15.499 \n", + "750 1.051 -0.815 0 20.866 \n", + "751 0.978 -1.303 0 15.260 \n", + "752 1.014 -0.153 0 15.355 \n", + "753 0.991 -1.987 0 15.185 \n", + "754 1.027 -0.745 0 16.550 \n", + "755 1.033 -1.789 0 16.112 \n", + "756 1.023 1.141 0 20.630 \n", + "757 1.009 -0.066 0 17.168 \n", + "\n", + " AS_DAYM780201 AS_FUKS010112 CT_RACS820104 \n", + "0 74.556 7.225 1.234 \n", + "1 56.056 4.942 1.853 \n", + "2 47.000 5.969 1.174 \n", + "3 69.222 5.462 1.138 \n", + "4 66.000 5.582 1.453 \n", + "5 72.000 5.779 1.844 \n", + "6 76.722 5.664 1.215 \n", + "7 72.000 4.251 1.560 \n", + "8 66.000 5.175 1.514 \n", + "9 77.444 5.626 1.621 \n", + "10 76.389 6.047 1.126 \n", + "11 76.444 5.492 1.445 \n", + "12 78.056 7.222 1.054 \n", + "13 75.556 6.667 1.079 \n", + "14 78.556 4.472 1.280 \n", + "15 81.222 6.207 1.506 \n", + "16 74.667 5.097 1.302 \n", + "17 61.667 4.829 2.026 \n", + "18 66.500 7.024 1.050 \n", + "19 54.875 5.229 1.651 \n", + "20 58.500 5.278 1.486 \n", + "21 67.200 6.524 0.986 \n", + "22 63.722 6.941 1.040 \n", + "23 62.667 4.870 1.547 \n", + "24 78.056 5.748 1.249 \n", + "25 72.611 6.828 1.507 \n", + "26 70.722 4.891 1.543 \n", + "27 71.667 5.953 1.108 \n", + "28 55.500 6.086 1.114 \n", + "29 91.500 5.723 1.055 \n", + ".. ... ... ... \n", + "728 68.667 6.047 1.125 \n", + "729 80.778 6.012 1.051 \n", + "730 77.667 6.042 1.137 \n", + "731 90.000 6.081 1.484 \n", + "732 85.833 5.612 1.165 \n", + "733 75.722 5.337 1.327 \n", + "734 72.533 5.525 1.147 \n", + "735 80.778 5.411 1.079 \n", + "736 75.200 5.939 1.150 \n", + "737 77.111 6.239 1.050 \n", + "738 95.611 5.369 1.276 \n", + "739 81.333 5.771 1.016 \n", + "740 79.222 6.023 1.242 \n", + "741 82.611 5.923 1.245 \n", + "742 80.389 5.809 1.300 \n", + "743 80.944 5.417 1.316 \n", + "744 74.944 5.601 1.053 \n", + "745 67.944 6.928 1.166 \n", + "746 86.500 6.284 1.156 \n", + "747 86.333 5.589 1.127 \n", + "748 86.417 5.584 1.228 \n", + "749 71.222 5.939 1.080 \n", + "750 86.462 5.712 1.298 \n", + "751 85.167 5.718 1.164 \n", + "752 81.167 6.904 0.933 \n", + "753 85.333 7.053 1.325 \n", + "754 74.667 6.729 1.132 \n", + "755 79.667 6.036 1.219 \n", + "756 76.786 5.669 1.111 \n", + "757 76.611 6.688 1.305 \n", + "\n", + "[758 rows x 11 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test = pd.read_csv(\"../AMP Data Sets/Test.csv\")\n", + "test" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(758, 11)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
FULL_ChargeFULL_AcidicMolPercFULL_AURR980107FULL_DAYM780201FULL_GEOR030101FULL_OOBM850104NT_EFC195AS_MeanAmphiMomentAS_DAYM780201AS_FUKS010112CT_RACS820104CLASS
05.00.0000.95174.8420.975-3.66300.28273.4445.6611.0411
14.05.4050.93171.5950.957-4.01110.60068.2226.5371.4531
25.55.4050.87373.5950.961-2.51200.59369.4444.9341.7221
35.04.1670.89566.2500.999-1.36200.61467.2224.3161.3821
47.58.5370.93264.7200.979-2.09100.61672.9444.5401.5391
\n", + "
" + ], + "text/plain": [ + " FULL_Charge FULL_AcidicMolPerc FULL_AURR980107 FULL_DAYM780201 \\\n", + "0 5.0 0.000 0.951 74.842 \n", + "1 4.0 5.405 0.931 71.595 \n", + "2 5.5 5.405 0.873 73.595 \n", + "3 5.0 4.167 0.895 66.250 \n", + "4 7.5 8.537 0.932 64.720 \n", + "\n", + " FULL_GEOR030101 FULL_OOBM850104 NT_EFC195 AS_MeanAmphiMoment \\\n", + "0 0.975 -3.663 0 0.282 \n", + "1 0.957 -4.011 1 0.600 \n", + "2 0.961 -2.512 0 0.593 \n", + "3 0.999 -1.362 0 0.614 \n", + "4 0.979 -2.091 0 0.616 \n", + "\n", + " AS_DAYM780201 AS_FUKS010112 CT_RACS820104 CLASS \n", + "0 73.444 5.661 1.041 1 \n", + "1 68.222 6.537 1.453 1 \n", + "2 69.444 4.934 1.722 1 \n", + "3 67.222 4.316 1.382 1 \n", + "4 72.944 4.540 1.539 1 " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#read in the data\n", + "data = pd.read_csv(\"../AMP Data Sets/AMP_TrainSet.csv\")\n", + "data.head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Analyze data by describing\n", + "\n", + "#### This step helped me know which features are in my dataset, are they categorical or numerical.\n", + "#### How many rows and columns does the dataset have\n", + "#### The data types for the various features\n", + "#### Checked whether the dataset has null or missing values\n", + "\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
FULL_ChargeFULL_AcidicMolPercFULL_AURR980107FULL_DAYM780201FULL_GEOR030101FULL_OOBM850104NT_EFC195AS_MeanAmphiMomentAS_DAYM780201AS_FUKS010112CT_RACS820104CLASS
count3038.0000003038.0000003038.0000003038.0000003038.0000003038.0000003038.0000003038.0000003038.0000003038.0000003038.0000003038.000000
mean2.0602378.5215200.97141073.6687600.994007-2.4329270.08854515.68323373.6508285.9113611.2352550.500000
std3.8199297.5866520.1074138.5274890.0313331.7072230.28413311.5756659.1660920.6936890.2100120.500082
min-16.0000000.0000000.68400042.7500000.866000-10.4320000.0000000.04100042.7780003.5330000.7850000.000000
25%0.0000002.5160000.89500068.2940000.974000-3.6060000.0000005.58750067.5560005.4592501.0820000.000000
50%2.0000007.1430000.96300074.0595000.994000-2.2965000.00000014.98850073.6970005.9255001.1840000.500000
75%4.00000013.1580001.04100079.3437501.011000-1.2832500.00000026.80775079.7780006.3820001.3510001.000000
max30.00000046.6670001.451000101.6820001.1960003.5760001.00000051.280000103.1670008.6620002.1920001.000000
\n", + "
" + ], + "text/plain": [ + " FULL_Charge FULL_AcidicMolPerc FULL_AURR980107 FULL_DAYM780201 \\\n", + "count 3038.000000 3038.000000 3038.000000 3038.000000 \n", + "mean 2.060237 8.521520 0.971410 73.668760 \n", + "std 3.819929 7.586652 0.107413 8.527489 \n", + "min -16.000000 0.000000 0.684000 42.750000 \n", + "25% 0.000000 2.516000 0.895000 68.294000 \n", + "50% 2.000000 7.143000 0.963000 74.059500 \n", + "75% 4.000000 13.158000 1.041000 79.343750 \n", + "max 30.000000 46.667000 1.451000 101.682000 \n", + "\n", + " FULL_GEOR030101 FULL_OOBM850104 NT_EFC195 AS_MeanAmphiMoment \\\n", + "count 3038.000000 3038.000000 3038.000000 3038.000000 \n", + "mean 0.994007 -2.432927 0.088545 15.683233 \n", + "std 0.031333 1.707223 0.284133 11.575665 \n", + "min 0.866000 -10.432000 0.000000 0.041000 \n", + "25% 0.974000 -3.606000 0.000000 5.587500 \n", + "50% 0.994000 -2.296500 0.000000 14.988500 \n", + "75% 1.011000 -1.283250 0.000000 26.807750 \n", + "max 1.196000 3.576000 1.000000 51.280000 \n", + "\n", + " AS_DAYM780201 AS_FUKS010112 CT_RACS820104 CLASS \n", + "count 3038.000000 3038.000000 3038.000000 3038.000000 \n", + "mean 73.650828 5.911361 1.235255 0.500000 \n", + "std 9.166092 0.693689 0.210012 0.500082 \n", + "min 42.778000 3.533000 0.785000 0.000000 \n", + "25% 67.556000 5.459250 1.082000 0.000000 \n", + "50% 73.697000 5.925500 1.184000 0.500000 \n", + "75% 79.778000 6.382000 1.351000 1.000000 \n", + "max 103.167000 8.662000 2.192000 1.000000 " + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Generate descriptive statistics that summarize the central tendency, dispersion, and shape of a dataset’s distribution, excluding NaN values\n", + "data.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "FULL_Charge 0\n", + "FULL_AcidicMolPerc 0\n", + "FULL_AURR980107 0\n", + "FULL_DAYM780201 0\n", + "FULL_GEOR030101 0\n", + "FULL_OOBM850104 0\n", + "NT_EFC195 0\n", + "AS_MeanAmphiMoment 0\n", + "AS_DAYM780201 0\n", + "AS_FUKS010112 0\n", + "CT_RACS820104 0\n", + "CLASS 0\n", + "dtype: int64" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#number of null values in each column\n", + "data.isnull().sum()\n", + "#since my data has no null values then its good to go" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### needed to know how balanced the class values are\n", + "\n", + "
\n", + " What did you learn from all the steps above." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "data.groupby('CLASS').size().plot(kind='bar')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Its a good idea to review all the pairwise correlations of the attributes in the dataset because some machine learning algorithm like linear and logistic regression can suffer poor performance if there are highly correlated attributes in the dataset\n", + "\n", + "
\n", + " Good explanation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "data.corr(method='pearson')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### heat map to show the correlation of the data; plots that show the interactions between multiple variables in the dataset\n", + "#### Correlation gives an indication of how related the changes are between two variables. If two variables change in the same direction they are positively correlated. If they change in opposite directions together (one goes up, one goes down), then they are negatively correlated. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(8,8))\n", + "sns.heatmap(data.corr(method='pearson'))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### also checked the corelation in regards to the class since am trying to build a ML agorithm for that class" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "data.corr(method='pearson')['CLASS']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Most of my variables are positively skewed" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + " data.skew().plot(kind='bar')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## understanding data with visualization\n", + "#### Data can be visualised in many ways that is univariate plots and multivariate plots #### Used the Histogram for univariate plot as shown below and the correlation matrix plot as the multivariate plot as shown above" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Histogram\n", + "#### This helps to understand each attribute of my dataset independently.\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Data pre-processing" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(18,18))\n", + "data.hist()\n", + "plt.subplots_adjust(bottom=3, right=2, top=5)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Standardize data\n", + "#### Standardization is a useful technique to transform attributes with a Gaussian distribution and differing means and standard deviations to a standard Gaussian distribution with a mean of 0 and a standard deviation of 1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import StandardScaler\n", + "array = data.values\n", + "#separate array into input and output components\n", + "X = array[:,0:11]\n", + "Y = array[:,11]\n", + "scaler = StandardScaler().fit(X)\n", + "rescaledX = scaler.transform(X)\n", + "# summarize transformed data\n", + "#set_printoptions(precision=3)\n", + "print(rescaledX[0:5,:])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "array = test.values\n", + "scaler = StandardScaler().fit(array)\n", + "rescaledt = scaler.transform(array)\n", + "# summarize transformed data\n", + "#set_printoptions(precision=3)\n", + "print(rescaledt[0:5,:])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Feature selection\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### it's the process of selecting a subset of relevant features for use in model construction" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Chose Recursive Feature Elimination\n", + "#### This is an automatic feature selection technique\n", + "#### Used logistic regression it is a good baseline as it is fast to train and predict and scales well.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.feature_selection import RFE\n", + "from sklearn.linear_model import LogisticRegression\n", + "\n", + "array = data.values\n", + "X = array[:,0:11]\n", + "Y = array[:,11]\n", + "# feature extraction\n", + "model = LogisticRegression()\n", + "rfe = RFE(model,8)\n", + "fit = rfe.fit(X,Y)\n", + "print(\"Num Features:\", fit.n_features_)\n", + "print(\"Selected Features:\", fit.support_)\n", + "print(\"Feature Ranking:\", fit.ranking_)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "X[:,fit.support_]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "drop=data.drop(['FULL_AcidicMolPerc', 'FULL_DAYM780201', 'AS_DAYM780201'],axis=1)\n", + "drop" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "drop_test = test.drop(['FULL_AcidicMolPerc', 'FULL_DAYM780201', 'AS_DAYM780201'],axis=1)\n", + "drop_test" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "1. #### Decided to first use all the first\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Evaluate the Performance of Machine Learning Algorithms with Resampling¶\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### The best way to evaluate the performance of an algorithm would be to make predictions for new data to which you already know the answers.\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Split into Train and Test Sets" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### This algorithm evaluation technique is very fast. It is ideal for large datasets where there is strong evidence that both splits of the data are representative of the underlying problem. Because of the speed, it is useful to use this approach when the algorithm you are investigating is slow to train.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "from sklearn.linear_model import LogisticRegression\n", + "\n", + "array = data.values\n", + "X = array[:,0:11]\n", + "Y = array[:,11]\n", + "test_size = 0.30\n", + "seed = 7\n", + "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=test_size,\n", + "random_state=seed)\n", + "model = LogisticRegression()\n", + "model.fit(X_train, Y_train)\n", + "result = model.score(X_test, Y_test)\n", + "print(\"Accuracy: \", (result*100.0))\n", + "\n", + "\n", + "model.fit(X,Y)\n", + "output = model.predict(test.values)\n", + "\n", + "from sklearn.metrics import matthews_corrcoef\n", + "mcc = matthews_corrcoef(model.predict(X),Y)\n", + "print('MCC:',mcc)\n", + " \n", + "report = pd.DataFrame(output)\n", + "report.columns = ['CLASS']\n", + "report.index.name = \"Index\"\n", + "report['CLASS']=report['CLASS'].map({0.0:False, 1.0:True})\n", + "report.to_csv(\"report.csv\")\n", + "\n", + "print(report['CLASS'].unique())\n", + "print('False: ',report.groupby('CLASS').size()[0].sum())\n", + "print('True: ',report.groupby('CLASS').size()[1].sum())\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## K-fold Cross Validation" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### It is more accurate because the algorithm is trained and evaluated multiple times on different data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import KFold\n", + "from sklearn.model_selection import cross_val_score\n", + "\n", + "num_folds = 10 #number of folds to use\n", + "seed = 7 #reproducibility\n", + "\n", + "kfold = KFold(n_splits=num_folds, random_state=seed)\n", + "model = LogisticRegression()\n", + "results = cross_val_score(model, X, Y, cv=kfold)\n", + "\n", + "print(f\"Accuracy:\", (results.mean()*100.0, results.std()*100.0))\n", + "\n", + "\n", + "model.fit(X,Y)\n", + "output = model.predict(test.values)\n", + "\n", + "from sklearn.metrics import matthews_corrcoef\n", + "mcc = matthews_corrcoef(model.predict(X),Y)\n", + "print('MCC:',mcc)\n", + " \n", + "report_kf = pd.DataFrame(output)\n", + "report_kf.columns = ['CLASS']\n", + "report_kf.index.name = \"Index\"\n", + "report_kf['CLASS']=report_kf['CLASS'].map({0.0:False, 1.0:True})\n", + "report_kf.to_csv(\"report_kf.csv\")\n", + "\n", + "print(report_kf['CLASS'].unique())\n", + "print('False: ',report_kf.groupby('CLASS').size()[0].sum())\n", + "print('True: ',report_kf.groupby('CLASS').size()[1].sum())\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Leave One Out Cross Validation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import LeaveOneOut\n", + "from sklearn.model_selection import cross_val_score\n", + "\n", + "num_folds = 10\n", + "loocv = LeaveOneOut()\n", + "model = LogisticRegression()\n", + "results = cross_val_score(model, X, Y, cv=loocv)\n", + "print(\"Accuracy:\", (results.mean()*100.0, results.std()*100.0))\n", + "\n", + "\n", + "model.fit(X,Y)\n", + "output = model.predict(test.values)\n", + "\n", + "from sklearn.metrics import matthews_corrcoef\n", + "mcc = matthews_corrcoef(model.predict(X),Y)\n", + "print('MCC:',mcc)\n", + " \n", + "report_l = pd.DataFrame(output)\n", + "report_l.columns = ['CLASS']\n", + "report_l.index.name = \"Index\"\n", + "report_l['CLASS']=report_l['CLASS'].map({0.0:False, 1.0:True})\n", + "report_l.to_csv(\"report_l.csv\")\n", + "\n", + "print(report_l['CLASS'].unique())\n", + "print('False: ',report_l.groupby('CLASS').size()[0].sum())\n", + "print('True: ',report_l.groupby('CLASS').size()[1].sum())\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Repeated Random Test-Train Splits" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Creates a random split of the data like the train/test split , but repeats the process of splitting and evaluation of the algorithm multiple times, like cross validation. Repeated random splits can be useful intermediates when trying to balance variance in the estimated performance, model training speed and dataset size\n", + "#### In this I prefered using Repeated Random Test_Train Splits because when you look at the dataset the zeros are one side and the ones on the otherside in the 'class' column. So I would prefer to first shuffle the data and then split it to reduce on the bias" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import ShuffleSplit\n", + "from sklearn.model_selection import cross_val_score\n", + "\n", + "n_splits = 10\n", + "test_size = 0.30\n", + "seed = 7\n", + "kfold = ShuffleSplit(n_splits=n_splits, test_size=test_size, random_state=seed)\n", + "model = LogisticRegression()\n", + "results = cross_val_score(model, X, Y, cv=kfold)\n", + "print(\"Accuracy: \" , (results.mean()*100.0, results.std()*100.0))\n", + "\n", + "\n", + "model.fit(X,Y)\n", + "output = model.predict(test.values)\n", + "\n", + "from sklearn.metrics import matthews_corrcoef\n", + "mcc = matthews_corrcoef(model.predict(X),Y)\n", + "print('MCC:',mcc)\n", + " \n", + "report_rrt = pd.DataFrame(output)\n", + "report_rrt.columns = ['CLASS']\n", + "report_rrt.index.name = \"Index\"\n", + "report_rrt['CLASS']=report_rrt['CLASS'].map({0.0:False, 1.0:True})\n", + "report_rrt.to_csv(\"report_rrt.csv\")\n", + "\n", + "print(report_rrt['CLASS'].unique())\n", + "print('False: ',report_rrt.groupby('CLASS').size()[0].sum())\n", + "print('True: ',report_rrt.groupby('CLASS').size()[1].sum())\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Machine Learning Algorithm Performance Metrics" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Algorithms Overview\n", + "### linear machine learning algorithms:\n", + "\n", + " Logistic Regression.\n", + " Linear Discriminant Analysis.\n", + "### onlinear machine learning algorithms\n", + "\n", + " k-Nearest Neighbors.\n", + " Naive Bayes.\n", + " Classication and Regression Trees.\n", + " Support Vector Machines.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Linear Machine Learning Algorithms" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Logistic Regression" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Logistic regression is best suited for binary classification: data sets where y = 0 or 1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Using standardized data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Logistic regression on standardized data\n", + "num_folds = 10\n", + "kfold = KFold(n_splits=10, random_state=7)\n", + "model = LogisticRegression()\n", + "results = cross_val_score(model, X, Y, cv=kfold)\n", + "print(results.mean())\n", + "\n", + "model.fit(rescaledX,Y)\n", + "output = model.predict(rescaledt)\n", + "\n", + "from sklearn.metrics import matthews_corrcoef\n", + "mcc = matthews_corrcoef(model.predict(X),Y)\n", + "print('MCC:',mcc)\n", + " \n", + "report_scaled = pd.DataFrame(output)\n", + "report_scaled.columns = ['CLASS']\n", + "report_scaled.index.name = \"Index\"\n", + "report_scaled['CLASS']=report_scaled['CLASS'].map({0.0:False, 1.0:True})\n", + "report_scaled.to_csv(\"report_scaled.csv\")\n", + "\n", + "print(report_scaled['CLASS'].unique())\n", + "print('False: ',report_scaled.groupby('CLASS').size()[0].sum())\n", + "print('True: ',report_scaled.groupby('CLASS').size()[1].sum())\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Logistic Regression Classification\n", + "\n", + "num_folds = 10\n", + "kfold = KFold(n_splits=10, random_state=7)\n", + "model = LogisticRegression()\n", + "results = cross_val_score(model, X, Y, cv=kfold)\n", + "print(results.mean())\n", + "\n", + "model.fit(X,Y)\n", + "output = model.predict(test.values)\n", + "\n", + "from sklearn.metrics import matthews_corrcoef\n", + "mcc = matthews_corrcoef(model.predict(X),Y)\n", + "print('MCC:',mcc)\n", + " \n", + "my_report = pd.DataFrame(output)\n", + "my_report.columns = ['CLASS']\n", + "my_report.index.name = \"Index\"\n", + "my_report['CLASS']=my_report['CLASS'].map({0.0:False, 1.0:True})\n", + "my_report.to_csv(\"report_XGB.csv\")\n", + "\n", + "print(my_report['CLASS'].unique())\n", + "print('False: ',my_report.groupby('CLASS').size()[0].sum())\n", + "print('True: ',my_report.groupby('CLASS').size()[1].sum())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Linear Discriminant Analysis¶\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n", + "\n", + "num_folds = 10\n", + "kfold = KFold(n_splits=10, random_state=7)\n", + "model = LinearDiscriminantAnalysis()\n", + "results = cross_val_score(model, X, Y, cv=kfold)\n", + "print(results.mean())\n", + "\n", + "\n", + "model.fit(X,Y)\n", + "output = model.predict(test.values)\n", + "\n", + "from sklearn.metrics import matthews_corrcoef\n", + "mcc = matthews_corrcoef(model.predict(X),Y)\n", + "print('MCC:',mcc)\n", + " \n", + "lda_report = pd.DataFrame(output)\n", + "lda_report.columns = ['CLASS']\n", + "lda_report.index.name = \"Index\"\n", + "lda_report['CLASS']=lda_report['CLASS'].map({0.0:False, 1.0:True})\n", + "lda_report.to_csv(\"ldareport.csv\")\n", + "\n", + "print(lda_report['CLASS'].unique())\n", + "print('False: ',lda_report.groupby('CLASS').size()[0].sum())\n", + "print('True: ',lda_report.groupby('CLASS').size()[1].sum())\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Nonlinear Machine Learning Algorithms" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### k-Nearest Neighbors" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.neighbors import KNeighborsClassifier\n", + "num_folds = 10\n", + "kfold = KFold(n_splits=10, random_state=7)\n", + "model = KNeighborsClassifier()\n", + "results = cross_val_score(model, X, Y, cv=kfold)\n", + "print(results.mean())\n", + "\n", + "\n", + "\n", + "model.fit(X,Y)\n", + "output = model.predict(test.values)\n", + "\n", + "from sklearn.metrics import matthews_corrcoef\n", + "mcc = matthews_corrcoef(model.predict(X),Y)\n", + "print('MCC:',mcc)\n", + " \n", + "report_k = pd.DataFrame(output)\n", + "report_k.columns = ['CLASS']\n", + "report_k.index.name = \"Index\"\n", + "report_k['CLASS']=report_k['CLASS'].map({0.0:False, 1.0:True})\n", + "report_k.to_csv(\"report_k.csv\")\n", + "\n", + "\n", + "print(report_k['CLASS'].unique())\n", + "print('False: ',report_k.groupby('CLASS').size()[0].sum())\n", + "print('True: ',report_k.groupby('CLASS').size()[1].sum())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Naive Bayes" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Tried using Standardised data on Naive Bayes\n", + "\n", + "### When I predicted Naive Bayes on Standardised data gave me a score of 0.98235, after feature selection it gave 0.90 and on unstandardised data it gave a score of 0.9959" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Naive Bayes on standardised data\n", + "from sklearn.naive_bayes import GaussianNB\n", + "\n", + "kfold = KFold(n_splits=10, random_state=7)\n", + "model = GaussianNB()\n", + "results = cross_val_score(model, X, Y, cv=kfold)\n", + "print(results.mean())\n", + "\n", + "\n", + "model.fit(rescaledX,Y)\n", + "output = model.predict(rescaledt)\n", + "\n", + "from sklearn.metrics import matthews_corrcoef\n", + "mcc = matthews_corrcoef(model.predict(X),Y)\n", + "print('MCC:',mcc)\n", + " \n", + "report_rebayes = pd.DataFrame(output)\n", + "report_rebayes.columns = ['CLASS']\n", + "report_rebayes.index.name = \"Index\"\n", + "report_rebayes['CLASS']=report_rebayes['CLASS'].map({0.0:False, 1.0:True})\n", + "report_rebayes.to_csv(\"report_rebayes.csv\")\n", + "\n", + "\n", + "print(report_rebayes['CLASS'].unique())\n", + "print('False: ',report_rebayes.groupby('CLASS').size()[0].sum())\n", + "print('True: ',report_rebayes.groupby('CLASS').size()[1].sum())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Naive Bayes on selected features" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Naive Bayes on selected features\n", + "\n", + "array = data.values\n", + "X = array[:,0:11]\n", + "Y = array[:,11]\n", + "\n", + "selectedX = X[:,fit.support_]\n", + "\n", + "array2 =test.values\n", + "selectedT = array2[:,fit.support_]\n", + "\n", + "kfold = KFold(n_splits=10, random_state=7)\n", + "model = GaussianNB()\n", + "results = cross_val_score(model, selectedX, Y, cv=kfold)\n", + "print(results.mean())\n", + "\n", + "\n", + "model.fit(selectedX,Y)\n", + "output = model.predict(selectedT)\n", + "\n", + "from sklearn.metrics import matthews_corrcoef\n", + "mcc = matthews_corrcoef(model.predict(selectedX),Y)\n", + "print('MCC:',mcc)\n", + " \n", + "report_sel = pd.DataFrame(output)\n", + "report_sel.columns = ['CLASS']\n", + "report_sel.index.name = \"Index\"\n", + "report_sel['CLASS']=report_sel['CLASS'].map({0.0:False, 1.0:True})\n", + "report_sel.to_csv(\"report_sel.csv\")\n", + "\n", + "\n", + "print(report_sel['CLASS'].unique())\n", + "print('False: ',report_sel.groupby('CLASS').size()[0].sum())\n", + "print('True: ',report_sel.groupby('CLASS').size()[1].sum())\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.naive_bayes import GaussianNB\n", + "\n", + "kfold = KFold(n_splits=10, random_state=7)\n", + "model = GaussianNB()\n", + "results = cross_val_score(model, X, Y, cv=kfold)\n", + "print(results.mean())\n", + "\n", + "\n", + "model.fit(X,Y)\n", + "output = model.predict(test.values)\n", + "\n", + "from sklearn.metrics import matthews_corrcoef\n", + "mcc = matthews_corrcoef(model.predict(X),Y)\n", + "print('MCC:',mcc)\n", + " \n", + "report_bayes = pd.DataFrame(output)\n", + "report_bayes.columns = ['CLASS']\n", + "report_bayes.index.name = \"Index\"\n", + "report_bayes['CLASS']=report_bayes['CLASS'].map({0.0:False, 1.0:True})\n", + "report_bayes.to_csv(\"report_bayes.csv\")\n", + "\n", + "\n", + "print(report_bayes['CLASS'].unique())\n", + "print('False: ',report_bayes.groupby('CLASS').size()[0].sum())\n", + "print('True: ',report_bayes.groupby('CLASS').size()[1].sum())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Classiffication and Regression Trees" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### used for classification or regression predictive modeling problems" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.tree import DecisionTreeClassifier\n", + "kfold = KFold(n_splits=10, random_state=7)\n", + "model = DecisionTreeClassifier()\n", + "results = cross_val_score(model, X, Y, cv=kfold)\n", + "print(results.mean())\n", + "\n", + "\n", + "model.fit(X,Y)\n", + "output = model.predict(test.values)\n", + "\n", + "from sklearn.metrics import matthews_corrcoef\n", + "mcc = matthews_corrcoef(model.predict(X),Y)\n", + "print('MCC:',mcc)\n", + " \n", + "report_tree = pd.DataFrame(output)\n", + "report_tree.columns = ['CLASS']\n", + "report_tree.index.name = \"Index\"\n", + "report_tree['CLASS']=report_tree['CLASS'].map({0.0:False, 1.0:True})\n", + "report_tree.to_csv(\"report_tree.csv\")\n", + "\n", + "\n", + "print(report_tree['CLASS'].unique())\n", + "print('False: ',report_tree.groupby('CLASS').size()[0].sum())\n", + "print('True: ',report_tree.groupby('CLASS').size()[1].sum())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Support Vector Machines " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### A support vector machine (SVM) is a supervised machine learning model that uses classification algorithms for two-group classification problems" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.svm import SVC\n", + "\n", + "kfold = KFold(n_splits=10, random_state=7)\n", + "model = SVC()\n", + "results = cross_val_score(model, X, Y, cv=kfold)\n", + "print(results.mean())\n", + "\n", + "model.fit(X,Y)\n", + "output = model.predict(test.values)\n", + "\n", + "from sklearn.metrics import matthews_corrcoef\n", + "mcc = matthews_corrcoef(model.predict(X),Y)\n", + "print('MCC:',mcc)\n", + " \n", + "report_svm = pd.DataFrame(output)\n", + "report_svm.columns = ['CLASS']\n", + "report_svm.index.name = \"Index\"\n", + "report_svm['CLASS']=report_svm['CLASS'].map({0.0:False, 1.0:True})\n", + "report_svm.to_csv(\"report_svm.csv\")\n", + "\n", + "\n", + "print(report_svm['CLASS'].unique())\n", + "print('False: ',report_svm.groupby('CLASS').size()[0].sum())\n", + "print('True: ',report_svm.groupby('CLASS').size()[1].sum())\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Combine Models Into Ensemble Predictions\n", + "\n", + "The three most popular methods for combining the predictions from different models are:\n", + " \n", + " Bagging\n", + " Boosting\n", + " Voting" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> # BoostingAlgorithms" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### These seek to improve the prediction power by training a sequence of weak models, each compensating the weaknesses of its predecessors.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## AdaBoost" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### This is specifically designed for classification problems" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# AdaBoost Classification\n", + "from pandas import read_csv\n", + "from sklearn.model_selection import KFold\n", + "from sklearn.model_selection import cross_val_score\n", + "from sklearn.ensemble import AdaBoostClassifier\n", + "\n", + "\n", + "X = array[:,0:11]\n", + "Y = array[:,11]\n", + "\n", + "num_trees = 39\n", + "seed=10\n", + "\n", + "kfold = KFold(n_splits=10, random_state=seed)\n", + "\n", + "model = AdaBoostClassifier(n_estimators=num_trees, random_state=seed)\n", + "results = cross_val_score(model, X, Y, cv=kfold)\n", + "\n", + "print(results.mean())\n", + "\n", + "model.fit(X,Y)\n", + "output = model.predict(test.values)\n", + "\n", + "from sklearn.metrics import matthews_corrcoef\n", + "mcc = matthews_corrcoef(model.predict(X),Y)\n", + "print('MCC:',mcc)\n", + " \n", + "report_ada = pd.DataFrame(output)\n", + "report_ada.columns = ['CLASS']\n", + "report_ada.index.name = \"Index\"\n", + "report_ada['CLASS']=report_ada['CLASS'].map({0.0:False, 1.0:True})\n", + "report_ada.to_csv(\"report_ada.csv\")\n", + "\n", + "\n", + "print(report_ada['CLASS'].unique())\n", + "print('False: ',report_ada.groupby('CLASS').size()[0].sum())\n", + "print('True: ',report_ada.groupby('CLASS').size()[1].sum())\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Bagging Algorithms" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Bagging is used with decision trees where it significantly raises the stability of models in the reduction of variance and improving accuracy, which eliminates the challenge of overfitting." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Bagged Decision Trees" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Bagged Decision Trees for Classification\n", + "from pandas import read_csv\n", + "from sklearn.model_selection import KFold\n", + "from sklearn.model_selection import cross_val_score\n", + "from sklearn.ensemble import BaggingClassifier\n", + "from sklearn.tree import DecisionTreeClassifier\n", + "\n", + "#split the data in portions\n", + "X = array[:,0:11]\n", + "Y = array[:,11]\n", + "seed = 7 #duplication\n", + "\n", + "#split according to cross validation\n", + "kfold = KFold(n_splits=10, random_state=seed)\n", + "\n", + "#initialize the model\n", + "cart = DecisionTreeClassifier()\n", + "\n", + "#bagging\n", + "num_trees = 250\n", + "\n", + "#model\n", + "model = BaggingClassifier(base_estimator=cart, n_estimators=num_trees, random_state=seed)\n", + "\n", + "results = cross_val_score(model, X, Y, cv=kfold)\n", + "print(results.mean())\n", + "\n", + "model.fit(X,Y)\n", + "output = model.predict(test.values)\n", + "\n", + "from sklearn.metrics import matthews_corrcoef\n", + "mcc = matthews_corrcoef(model.predict(X),Y)\n", + "print('MCC:',mcc)\n", + " \n", + "report_bag = pd.DataFrame(output)\n", + "report_bag.columns = ['CLASS']\n", + "report_bag.index.name = \"Index\"\n", + "report_bag['CLASS']=report_bag['CLASS'].map({0.0:False, 1.0:True})\n", + "report_bag.to_csv(\"report_bag.csv\")\n", + "\n", + "\n", + "print(report_bag['CLASS'].unique())\n", + "print('False: ',report_bag.groupby('CLASS').size()[0].sum())\n", + "print('True: ',report_bag.groupby('CLASS').size()[1].sum())\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Random Forest" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Random Forest Classification\n", + "from pandas import read_csv\n", + "from sklearn.model_selection import KFold\n", + "from sklearn.model_selection import cross_val_score\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "\n", + "\n", + "X = array[:,0:11]\n", + "Y = array[:,11]\n", + "\n", + "num_trees = 1000\n", + "\n", + "max_features = 3\n", + "\n", + "kfold = KFold(n_splits=10, random_state=7)\n", + "model = RandomForestClassifier(n_estimators=num_trees, max_features=max_features)\n", + "results = cross_val_score(model, X, Y, cv=kfold)\n", + "print(results.mean())\n", + "\n", + "model.fit(X,Y)\n", + "output = model.predict(test.values)\n", + "\n", + "from sklearn.metrics import matthews_corrcoef\n", + "mcc = matthews_corrcoef(model.predict(X),Y)\n", + "print('MCC:',mcc)\n", + " \n", + "report_rf = pd.DataFrame(output)\n", + "report_rf.columns = ['CLASS']\n", + "report_rf.index.name = \"Index\"\n", + "report_rf['CLASS']=report_rf['CLASS'].map({0.0:False, 1.0:True})\n", + "report_rf.to_csv(\"report_rf.csv\")\n", + "\n", + "\n", + "print(report_rf['CLASS'].unique())\n", + "print('False: ',report_rf.groupby('CLASS').size()[0].sum())\n", + "print('True: ',report_rf.groupby('CLASS').size()[1].sum())\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Extra Trees" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.ensemble import ExtraTreesClassifier\n", + "\n", + "X = array[:,0:11]\n", + "Y = array[:,11]\n", + "\n", + "num_trees = 100\n", + "max_features = 7\n", + "\n", + "kfold = KFold(n_splits=10, random_state=7)\n", + "\n", + "model = ExtraTreesClassifier(n_estimators=num_trees, max_features=max_features)\n", + "\n", + "results = cross_val_score(model, X, Y, cv=kfold)\n", + "\n", + "print(results.mean())\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Voting Ensemble" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Voting Ensemble for Classification\n", + "from pandas import read_csv\n", + "from sklearn.model_selection import KFold\n", + "from sklearn.model_selection import cross_val_score\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.tree import DecisionTreeClassifier\n", + "from xgboost import XGBClassifier\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "from sklearn.svm import SVC\n", + "from sklearn.ensemble import VotingClassifier\n", + "\n", + "\n", + "X = array[:,0:11]\n", + "Y = array[:,11]\n", + "kfold = KFold(n_splits=10, random_state=7)\n", + "\n", + "# create the sub models\n", + "estimators = []\n", + "model1 = LogisticRegression()\n", + "estimators.append(('logistic', model1))\n", + "\n", + "model2 = DecisionTreeClassifier()\n", + "estimators.append(('cart', model2))\n", + "\n", + "model3 = SVC()\n", + "estimators.append(('svm', model3))\n", + "\n", + "model4 = XGBClassifier()\n", + "estimators.append(('xgb', model4))\n", + "\n", + "model5 = RandomForestClassifier()\n", + "estimators.append(('rfc', model5))\n", + "\n", + "# create the ensemble model\n", + "ensemble = VotingClassifier(estimators)\n", + "results = cross_val_score(ensemble, X, Y, cv=kfold)\n", + "print(results.mean())\n", + "\n", + "\n", + "model.fit(X,Y)\n", + "output = model.predict(test.values)\n", + "\n", + "from sklearn.metrics import matthews_corrcoef\n", + "mcc = matthews_corrcoef(model.predict(X),Y)\n", + "print('MCC:',mcc)\n", + " \n", + "report_v = pd.DataFrame(output)\n", + "report_v.columns = ['CLASS']\n", + "report_v.index.name = \"Index\"\n", + "report_v['CLASS']=report_v['CLASS'].map({0.0:False, 1.0:True})\n", + "report_v.to_csv(\"report_v.csv\")\n", + "\n", + "\n", + "print(report_v['CLASS'].unique())\n", + "print('False: ',report_v.groupby('CLASS').size()[0].sum())\n", + "print('True: ',report_v.groupby('CLASS').size()[1].sum())\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## comparing the algorithms" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# prepare models and add them to a list\n", + "from matplotlib import pyplot\n", + "\n", + "models = []\n", + "models.append(('LR', LogisticRegression(solver='liblinear', multi_class='ovr')))\n", + "models.append(('LDA', LinearDiscriminantAnalysis()))\n", + "models.append(('KNN', KNeighborsClassifier()))\n", + "models.append(('CART', DecisionTreeClassifier()))\n", + "models.append(('NB', GaussianNB()))\n", + "models.append(('SVM', SVC(gamma='auto')))\n", + "models.append(('ETC', ExtraTreesClassifier()))\n", + "models.append(('RFC', RandomForestClassifier()))\n", + "\n", + "# evaluate each model in turn\n", + "results = []\n", + "names = []\n", + "scoring = 'accuracy'\n", + "\n", + "for name, model in models:\n", + " kfold = KFold(n_splits=10, random_state=7)\n", + " cv_results = cross_val_score(model, X, Y, cv=kfold, scoring=scoring)\n", + " results.append(cv_results)\n", + " names.append(name)\n", + " msg = (name, cv_results.mean(), cv_results.std())\n", + " print(msg)\n", + "\n", + "# boxplot algorithm comparison\n", + "fig = pyplot.figure()\n", + "fig.suptitle('Algorithm Comparison')\n", + "ax = fig.add_subplot(111)\n", + "pyplot.boxplot(results)\n", + "ax.set_xticklabels(names)\n", + "pyplot.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# '''''''''''''''''''''''''''''''END''''''''''''''''''''''''''''''" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + }, + "varInspector": { + "cols": { + "lenName": 16, + "lenType": 16, + "lenVar": 40 + }, + "kernels_config": { + "python": { + "delete_cmd_postfix": "", + "delete_cmd_prefix": "del ", + "library": "var_list.py", + "varRefreshCmd": "print(var_dic_list())" + }, + "r": { + "delete_cmd_postfix": ") ", + "delete_cmd_prefix": "rm(", + "library": "var_list.r", + "varRefreshCmd": "cat(var_dic_list()) " + } + }, + "types_to_exclude": [ + "module", + "function", + "builtin_function_or_method", + "instance", + "_Feature" + ], + "window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} From 2b4af7510c65f8e1330beb60d36d65d19b3e5c42 Mon Sep 17 00:00:00 2001 From: nashim-kayaga <56426944+nashim-kayaga@users.noreply.github.com> Date: Mon, 16 Mar 2020 13:15:53 +0300 Subject: [PATCH 3/4] final notebook --- Assignment Colab/KAYAGA NASHIM MILVAT.ipynb | 1678 +------------------ 1 file changed, 52 insertions(+), 1626 deletions(-) diff --git a/Assignment Colab/KAYAGA NASHIM MILVAT.ipynb b/Assignment Colab/KAYAGA NASHIM MILVAT.ipynb index 36296d9..d34411d 100644 --- a/Assignment Colab/KAYAGA NASHIM MILVAT.ipynb +++ b/Assignment Colab/KAYAGA NASHIM MILVAT.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": { "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19", "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5" @@ -21,9 +21,9 @@ "# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n", "\n", "import os\n", - "#for dirname, _, filenames in os.walk('/kaggle/input'):\n", - "# for filename in filenames:\n", - "# print(os.path.join(dirname, filename))\n", + "for dirname, _, filenames in os.walk('/kaggle/input'):\n", + " for filename in filenames:\n", + " print(os.path.join(dirname, filename))\n", " \n", "\n", "# Any results you write to the current directory are saved as output." @@ -37,16 +37,12 @@ "#### https://machinelearningmastery.com/evaluate-performance-machine-learning-algorithms-python-using-resampling/\n", "#### https://www.dataquest.io/blog/top-10-machine-learning-algorithms-for-beginners/\n", "#### https://monkeylearn.com/blog/introduction-to-support-vector-machines-svm/\n", - "#### https://towardsdatascience.com/understanding-random-forest-58381e0602d2\n", - "\n", - "\n", - "
\n", - " Please put these at the bottom." + "#### https://towardsdatascience.com/understanding-random-forest-58381e0602d2" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": { "_cell_guid": "79c7e3d0-c299-4dcb-8224-4455121ee9b0", "_uuid": "d629ff2d2480ee46fbb7e2d37f6b5fab8052498a" @@ -59,1279 +55,31 @@ }, { "cell_type": "code", - "execution_count": 3, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
FULL_ChargeFULL_AcidicMolPercFULL_AURR980107FULL_DAYM780201FULL_GEOR030101FULL_OOBM850104NT_EFC195AS_MeanAmphiMomentAS_DAYM780201AS_FUKS010112CT_RACS820104
04.03.7040.87373.5190.987-4.83300.38274.5567.2251.234
14.04.4440.89262.4440.931-0.58400.32056.0564.9421.853
22.00.0000.90147.0001.039-5.66400.16447.0005.9691.174
34.50.0000.86969.2220.982-5.42302.01069.2225.4621.138
4-4.021.5911.06171.6820.976-2.00202.75866.0005.5821.453
54.56.9770.89568.5120.950-1.87803.09072.0005.7791.844
612.03.1751.02274.4601.010-3.22503.17276.7225.6641.215
71.53.7040.93269.5190.977-2.50902.54372.0004.2511.560
83.03.3330.90359.5000.963-1.68202.99066.0005.1751.514
94.00.0000.87372.7920.998-4.94302.98577.4445.6261.621
1011.08.2190.92775.0680.989-3.11803.49376.3896.0471.126
114.52.7030.96669.7570.972-3.89613.71476.4445.4921.445
120.011.5381.02777.9230.981-3.95413.67978.0567.2221.054
136.03.3331.11479.1001.024-2.43703.98875.5566.6671.079
140.00.0001.00578.9711.1021.54404.14378.5564.4721.280
153.06.0610.89776.4550.955-4.03214.31081.2226.2071.506
163.02.1280.88964.0641.0000.58304.09774.6675.0971.302
179.50.0000.78655.6470.955-0.57703.81661.6674.8292.026
181.512.0000.94870.7200.956-3.55913.98266.5007.0241.050
194.00.0000.82854.8751.048-2.85304.29354.8755.2291.651
205.03.1250.90169.5940.995-1.67703.87658.5005.2781.486
211.50.0000.87267.2000.972-5.39204.47167.2006.5240.986
222.00.0000.78664.1500.969-4.70603.92963.7226.9411.040
238.00.0000.93362.8001.008-4.17004.10462.6674.8701.547
243.010.8111.08677.1081.010-2.11204.20878.0565.7481.249
254.02.9410.90071.2060.967-3.96304.12672.6116.8281.507
2610.00.0000.84569.5110.975-2.04903.97770.7224.8911.543
275.50.0000.94671.6671.023-4.98203.43971.6675.9531.108
289.00.0000.82254.5450.993-3.95503.56255.5006.0861.114
29-6.038.2351.23983.5591.0020.10813.31791.5005.7231.055
....................................
7281.04.1670.93167.8330.936-0.775014.99368.6676.0471.125
7291.07.4070.93081.2590.985-2.859014.99380.7786.0121.051
7300.025.0001.11979.6671.023-0.379014.98677.6676.0421.137
731-2.017.5001.06884.1000.995-1.328014.94390.0006.0811.484
7321.04.5450.83085.0910.996-3.729014.98285.8335.6121.165
7331.011.5381.05070.8851.015-2.530014.98175.7225.3371.327
7340.013.3330.92572.5330.993-2.093018.27972.5335.5251.147
7350.511.1111.13580.7781.034-1.448014.95180.7785.4111.079
736-0.513.3330.95775.2000.969-1.257017.79975.2005.9391.150
7375.010.2040.94778.1840.986-3.313015.30077.1116.2391.050
738-1.019.2311.06591.0771.0040.520015.21495.6115.3691.276
739-1.517.6471.11982.1471.005-1.477015.32181.3335.7711.016
7406.53.7040.90976.1111.009-2.840015.66679.2226.0231.242
7415.00.0000.83680.0970.973-2.795015.11082.6115.9231.245
742-3.513.3331.11575.2891.027-1.292015.05880.3895.8091.300
7434.08.3330.92879.0000.960-3.607015.09180.9445.4171.316
74410.517.0001.09182.1601.003-2.414015.27074.9445.6011.053
7455.512.8210.99667.5130.991-4.674014.98967.9446.9281.166
746-2.019.0481.03085.3330.969-1.162015.45886.5006.2841.156
747-2.520.5131.09982.7951.014-0.965015.71586.3335.5891.127
748-2.025.0001.17886.4171.0131.322022.98986.4175.5841.228
749-4.021.8751.02166.1880.994-0.523015.49971.2225.9391.080
7502.00.0001.05486.4621.051-0.815020.86686.4625.7121.298
751-2.014.8941.04283.2550.978-1.303015.26085.1675.7181.164
752-8.028.2611.22182.1091.014-0.153015.35581.1676.9040.933
753-1.516.0001.10082.8200.991-1.987015.18585.3337.0531.325
754-1.018.1821.08573.4551.027-0.745016.55074.6676.7291.132
755-1.019.0481.10882.1901.033-1.789016.11279.6676.0361.219
756-1.07.1430.95576.7861.0231.141020.63076.7865.6691.111
757-7.017.1431.07884.1861.009-0.066017.16876.6116.6881.305
\n", - "

758 rows × 11 columns

\n", - "
" - ], - "text/plain": [ - " FULL_Charge FULL_AcidicMolPerc FULL_AURR980107 FULL_DAYM780201 \\\n", - "0 4.0 3.704 0.873 73.519 \n", - "1 4.0 4.444 0.892 62.444 \n", - "2 2.0 0.000 0.901 47.000 \n", - "3 4.5 0.000 0.869 69.222 \n", - "4 -4.0 21.591 1.061 71.682 \n", - "5 4.5 6.977 0.895 68.512 \n", - "6 12.0 3.175 1.022 74.460 \n", - "7 1.5 3.704 0.932 69.519 \n", - "8 3.0 3.333 0.903 59.500 \n", - "9 4.0 0.000 0.873 72.792 \n", - "10 11.0 8.219 0.927 75.068 \n", - "11 4.5 2.703 0.966 69.757 \n", - "12 0.0 11.538 1.027 77.923 \n", - "13 6.0 3.333 1.114 79.100 \n", - "14 0.0 0.000 1.005 78.971 \n", - "15 3.0 6.061 0.897 76.455 \n", - "16 3.0 2.128 0.889 64.064 \n", - "17 9.5 0.000 0.786 55.647 \n", - "18 1.5 12.000 0.948 70.720 \n", - "19 4.0 0.000 0.828 54.875 \n", - "20 5.0 3.125 0.901 69.594 \n", - "21 1.5 0.000 0.872 67.200 \n", - "22 2.0 0.000 0.786 64.150 \n", - "23 8.0 0.000 0.933 62.800 \n", - "24 3.0 10.811 1.086 77.108 \n", - "25 4.0 2.941 0.900 71.206 \n", - "26 10.0 0.000 0.845 69.511 \n", - "27 5.5 0.000 0.946 71.667 \n", - "28 9.0 0.000 0.822 54.545 \n", - "29 -6.0 38.235 1.239 83.559 \n", - ".. ... ... ... ... \n", - "728 1.0 4.167 0.931 67.833 \n", - "729 1.0 7.407 0.930 81.259 \n", - "730 0.0 25.000 1.119 79.667 \n", - "731 -2.0 17.500 1.068 84.100 \n", - "732 1.0 4.545 0.830 85.091 \n", - "733 1.0 11.538 1.050 70.885 \n", - "734 0.0 13.333 0.925 72.533 \n", - "735 0.5 11.111 1.135 80.778 \n", - "736 -0.5 13.333 0.957 75.200 \n", - "737 5.0 10.204 0.947 78.184 \n", - "738 -1.0 19.231 1.065 91.077 \n", - "739 -1.5 17.647 1.119 82.147 \n", - "740 6.5 3.704 0.909 76.111 \n", - "741 5.0 0.000 0.836 80.097 \n", - "742 -3.5 13.333 1.115 75.289 \n", - "743 4.0 8.333 0.928 79.000 \n", - "744 10.5 17.000 1.091 82.160 \n", - "745 5.5 12.821 0.996 67.513 \n", - "746 -2.0 19.048 1.030 85.333 \n", - "747 -2.5 20.513 1.099 82.795 \n", - "748 -2.0 25.000 1.178 86.417 \n", - "749 -4.0 21.875 1.021 66.188 \n", - "750 2.0 0.000 1.054 86.462 \n", - "751 -2.0 14.894 1.042 83.255 \n", - "752 -8.0 28.261 1.221 82.109 \n", - "753 -1.5 16.000 1.100 82.820 \n", - "754 -1.0 18.182 1.085 73.455 \n", - "755 -1.0 19.048 1.108 82.190 \n", - "756 -1.0 7.143 0.955 76.786 \n", - "757 -7.0 17.143 1.078 84.186 \n", - "\n", - " FULL_GEOR030101 FULL_OOBM850104 NT_EFC195 AS_MeanAmphiMoment \\\n", - "0 0.987 -4.833 0 0.382 \n", - "1 0.931 -0.584 0 0.320 \n", - "2 1.039 -5.664 0 0.164 \n", - "3 0.982 -5.423 0 2.010 \n", - "4 0.976 -2.002 0 2.758 \n", - "5 0.950 -1.878 0 3.090 \n", - "6 1.010 -3.225 0 3.172 \n", - "7 0.977 -2.509 0 2.543 \n", - "8 0.963 -1.682 0 2.990 \n", - "9 0.998 -4.943 0 2.985 \n", - "10 0.989 -3.118 0 3.493 \n", - "11 0.972 -3.896 1 3.714 \n", - "12 0.981 -3.954 1 3.679 \n", - "13 1.024 -2.437 0 3.988 \n", - "14 1.102 1.544 0 4.143 \n", - "15 0.955 -4.032 1 4.310 \n", - "16 1.000 0.583 0 4.097 \n", - "17 0.955 -0.577 0 3.816 \n", - "18 0.956 -3.559 1 3.982 \n", - "19 1.048 -2.853 0 4.293 \n", - "20 0.995 -1.677 0 3.876 \n", - "21 0.972 -5.392 0 4.471 \n", - "22 0.969 -4.706 0 3.929 \n", - "23 1.008 -4.170 0 4.104 \n", - "24 1.010 -2.112 0 4.208 \n", - "25 0.967 -3.963 0 4.126 \n", - "26 0.975 -2.049 0 3.977 \n", - "27 1.023 -4.982 0 3.439 \n", - "28 0.993 -3.955 0 3.562 \n", - "29 1.002 0.108 1 3.317 \n", - ".. ... ... ... ... \n", - "728 0.936 -0.775 0 14.993 \n", - "729 0.985 -2.859 0 14.993 \n", - "730 1.023 -0.379 0 14.986 \n", - "731 0.995 -1.328 0 14.943 \n", - "732 0.996 -3.729 0 14.982 \n", - "733 1.015 -2.530 0 14.981 \n", - "734 0.993 -2.093 0 18.279 \n", - "735 1.034 -1.448 0 14.951 \n", - "736 0.969 -1.257 0 17.799 \n", - "737 0.986 -3.313 0 15.300 \n", - "738 1.004 0.520 0 15.214 \n", - "739 1.005 -1.477 0 15.321 \n", - "740 1.009 -2.840 0 15.666 \n", - "741 0.973 -2.795 0 15.110 \n", - "742 1.027 -1.292 0 15.058 \n", - "743 0.960 -3.607 0 15.091 \n", - "744 1.003 -2.414 0 15.270 \n", - "745 0.991 -4.674 0 14.989 \n", - "746 0.969 -1.162 0 15.458 \n", - "747 1.014 -0.965 0 15.715 \n", - "748 1.013 1.322 0 22.989 \n", - "749 0.994 -0.523 0 15.499 \n", - "750 1.051 -0.815 0 20.866 \n", - "751 0.978 -1.303 0 15.260 \n", - "752 1.014 -0.153 0 15.355 \n", - "753 0.991 -1.987 0 15.185 \n", - "754 1.027 -0.745 0 16.550 \n", - "755 1.033 -1.789 0 16.112 \n", - "756 1.023 1.141 0 20.630 \n", - "757 1.009 -0.066 0 17.168 \n", - "\n", - " AS_DAYM780201 AS_FUKS010112 CT_RACS820104 \n", - "0 74.556 7.225 1.234 \n", - "1 56.056 4.942 1.853 \n", - "2 47.000 5.969 1.174 \n", - "3 69.222 5.462 1.138 \n", - "4 66.000 5.582 1.453 \n", - "5 72.000 5.779 1.844 \n", - "6 76.722 5.664 1.215 \n", - "7 72.000 4.251 1.560 \n", - "8 66.000 5.175 1.514 \n", - "9 77.444 5.626 1.621 \n", - "10 76.389 6.047 1.126 \n", - "11 76.444 5.492 1.445 \n", - "12 78.056 7.222 1.054 \n", - "13 75.556 6.667 1.079 \n", - "14 78.556 4.472 1.280 \n", - "15 81.222 6.207 1.506 \n", - "16 74.667 5.097 1.302 \n", - "17 61.667 4.829 2.026 \n", - "18 66.500 7.024 1.050 \n", - "19 54.875 5.229 1.651 \n", - "20 58.500 5.278 1.486 \n", - "21 67.200 6.524 0.986 \n", - "22 63.722 6.941 1.040 \n", - "23 62.667 4.870 1.547 \n", - "24 78.056 5.748 1.249 \n", - "25 72.611 6.828 1.507 \n", - "26 70.722 4.891 1.543 \n", - "27 71.667 5.953 1.108 \n", - "28 55.500 6.086 1.114 \n", - "29 91.500 5.723 1.055 \n", - ".. ... ... ... \n", - "728 68.667 6.047 1.125 \n", - "729 80.778 6.012 1.051 \n", - "730 77.667 6.042 1.137 \n", - "731 90.000 6.081 1.484 \n", - "732 85.833 5.612 1.165 \n", - "733 75.722 5.337 1.327 \n", - "734 72.533 5.525 1.147 \n", - "735 80.778 5.411 1.079 \n", - "736 75.200 5.939 1.150 \n", - "737 77.111 6.239 1.050 \n", - "738 95.611 5.369 1.276 \n", - "739 81.333 5.771 1.016 \n", - "740 79.222 6.023 1.242 \n", - "741 82.611 5.923 1.245 \n", - "742 80.389 5.809 1.300 \n", - "743 80.944 5.417 1.316 \n", - "744 74.944 5.601 1.053 \n", - "745 67.944 6.928 1.166 \n", - "746 86.500 6.284 1.156 \n", - "747 86.333 5.589 1.127 \n", - "748 86.417 5.584 1.228 \n", - "749 71.222 5.939 1.080 \n", - "750 86.462 5.712 1.298 \n", - "751 85.167 5.718 1.164 \n", - "752 81.167 6.904 0.933 \n", - "753 85.333 7.053 1.325 \n", - "754 74.667 6.729 1.132 \n", - "755 79.667 6.036 1.219 \n", - "756 76.786 5.669 1.111 \n", - "757 76.611 6.688 1.305 \n", - "\n", - "[758 rows x 11 columns]" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "test = pd.read_csv(\"../AMP Data Sets/Test.csv\")\n", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "test = pd.read_csv(\"../input/ace-class-assignment/Test.csv\")\n", "test" ] }, { "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(758, 11)" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "test.shape" ] }, { "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
FULL_ChargeFULL_AcidicMolPercFULL_AURR980107FULL_DAYM780201FULL_GEOR030101FULL_OOBM850104NT_EFC195AS_MeanAmphiMomentAS_DAYM780201AS_FUKS010112CT_RACS820104CLASS
05.00.0000.95174.8420.975-3.66300.28273.4445.6611.0411
14.05.4050.93171.5950.957-4.01110.60068.2226.5371.4531
25.55.4050.87373.5950.961-2.51200.59369.4444.9341.7221
35.04.1670.89566.2500.999-1.36200.61467.2224.3161.3821
47.58.5370.93264.7200.979-2.09100.61672.9444.5401.5391
\n", - "
" - ], - "text/plain": [ - " FULL_Charge FULL_AcidicMolPerc FULL_AURR980107 FULL_DAYM780201 \\\n", - "0 5.0 0.000 0.951 74.842 \n", - "1 4.0 5.405 0.931 71.595 \n", - "2 5.5 5.405 0.873 73.595 \n", - "3 5.0 4.167 0.895 66.250 \n", - "4 7.5 8.537 0.932 64.720 \n", - "\n", - " FULL_GEOR030101 FULL_OOBM850104 NT_EFC195 AS_MeanAmphiMoment \\\n", - "0 0.975 -3.663 0 0.282 \n", - "1 0.957 -4.011 1 0.600 \n", - "2 0.961 -2.512 0 0.593 \n", - "3 0.999 -1.362 0 0.614 \n", - "4 0.979 -2.091 0 0.616 \n", - "\n", - " AS_DAYM780201 AS_FUKS010112 CT_RACS820104 CLASS \n", - "0 73.444 5.661 1.041 1 \n", - "1 68.222 6.537 1.453 1 \n", - "2 69.444 4.934 1.722 1 \n", - "3 67.222 4.316 1.382 1 \n", - "4 72.944 4.540 1.539 1 " - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "#read in the data\n", - "data = pd.read_csv(\"../AMP Data Sets/AMP_TrainSet.csv\")\n", + "data = pd.read_csv(\"../input/ace-class-assignment/AMP_TrainSet.csv\")\n", "data.head(5)" ] }, @@ -1349,27 +97,14 @@ "#### This step helped me know which features are in my dataset, are they categorical or numerical.\n", "#### How many rows and columns does the dataset have\n", "#### The data types for the various features\n", - "#### Checked whether the dataset has null or missing values\n", - "\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
FULL_ChargeFULL_AcidicMolPercFULL_AURR980107FULL_DAYM780201FULL_GEOR030101FULL_OOBM850104NT_EFC195AS_MeanAmphiMomentAS_DAYM780201AS_FUKS010112CT_RACS820104CLASS
count3038.0000003038.0000003038.0000003038.0000003038.0000003038.0000003038.0000003038.0000003038.0000003038.0000003038.0000003038.000000
mean2.0602378.5215200.97141073.6687600.994007-2.4329270.08854515.68323373.6508285.9113611.2352550.500000
std3.8199297.5866520.1074138.5274890.0313331.7072230.28413311.5756659.1660920.6936890.2100120.500082
min-16.0000000.0000000.68400042.7500000.866000-10.4320000.0000000.04100042.7780003.5330000.7850000.000000
25%0.0000002.5160000.89500068.2940000.974000-3.6060000.0000005.58750067.5560005.4592501.0820000.000000
50%2.0000007.1430000.96300074.0595000.994000-2.2965000.00000014.98850073.6970005.9255001.1840000.500000
75%4.00000013.1580001.04100079.3437501.011000-1.2832500.00000026.80775079.7780006.3820001.3510001.000000
max30.00000046.6670001.451000101.6820001.1960003.5760001.00000051.280000103.1670008.6620002.1920001.000000
\n", - "
" - ], - "text/plain": [ - " FULL_Charge FULL_AcidicMolPerc FULL_AURR980107 FULL_DAYM780201 \\\n", - "count 3038.000000 3038.000000 3038.000000 3038.000000 \n", - "mean 2.060237 8.521520 0.971410 73.668760 \n", - "std 3.819929 7.586652 0.107413 8.527489 \n", - "min -16.000000 0.000000 0.684000 42.750000 \n", - "25% 0.000000 2.516000 0.895000 68.294000 \n", - "50% 2.000000 7.143000 0.963000 74.059500 \n", - "75% 4.000000 13.158000 1.041000 79.343750 \n", - "max 30.000000 46.667000 1.451000 101.682000 \n", - "\n", - " FULL_GEOR030101 FULL_OOBM850104 NT_EFC195 AS_MeanAmphiMoment \\\n", - "count 3038.000000 3038.000000 3038.000000 3038.000000 \n", - "mean 0.994007 -2.432927 0.088545 15.683233 \n", - "std 0.031333 1.707223 0.284133 11.575665 \n", - "min 0.866000 -10.432000 0.000000 0.041000 \n", - "25% 0.974000 -3.606000 0.000000 5.587500 \n", - "50% 0.994000 -2.296500 0.000000 14.988500 \n", - "75% 1.011000 -1.283250 0.000000 26.807750 \n", - "max 1.196000 3.576000 1.000000 51.280000 \n", - "\n", - " AS_DAYM780201 AS_FUKS010112 CT_RACS820104 CLASS \n", - "count 3038.000000 3038.000000 3038.000000 3038.000000 \n", - "mean 73.650828 5.911361 1.235255 0.500000 \n", - "std 9.166092 0.693689 0.210012 0.500082 \n", - "min 42.778000 3.533000 0.785000 0.000000 \n", - "25% 67.556000 5.459250 1.082000 0.000000 \n", - "50% 73.697000 5.925500 1.184000 0.500000 \n", - "75% 79.778000 6.382000 1.351000 1.000000 \n", - "max 103.167000 8.662000 2.192000 1.000000 " - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "#Generate descriptive statistics that summarize the central tendency, dispersion, and shape of a dataset’s distribution, excluding NaN values\n", "data.describe()" @@ -1640,32 +140,9 @@ }, { "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "FULL_Charge 0\n", - "FULL_AcidicMolPerc 0\n", - "FULL_AURR980107 0\n", - "FULL_DAYM780201 0\n", - "FULL_GEOR030101 0\n", - "FULL_OOBM850104 0\n", - "NT_EFC195 0\n", - "AS_MeanAmphiMoment 0\n", - "AS_DAYM780201 0\n", - "AS_FUKS010112 0\n", - "CT_RACS820104 0\n", - "CLASS 0\n", - "dtype: int64" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "#number of null values in each column\n", "data.isnull().sum()\n", @@ -1681,10 +158,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#### needed to know how balanced the class values are\n", - "\n", - "
\n", - " What did you learn from all the steps above." + "#### needed to know how balanced the class values are" ] }, { @@ -1701,10 +175,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#### Its a good idea to review all the pairwise correlations of the attributes in the dataset because some machine learning algorithm like linear and logistic regression can suffer poor performance if there are highly correlated attributes in the dataset\n", - "\n", - "
\n", - " Good explanation" + "#### Its a good idea to review all the pairwise correlations of the attributes in the dataset because some machine learning algorithm like linear and logistic regression can suffer poor performance if there are highly correlated attributes in the dataset" ] }, { @@ -1780,8 +251,7 @@ "metadata": {}, "source": [ "## Histogram\n", - "#### This helps to understand each attribute of my dataset independently.\n", - "\n" + "#### This helps to understand each attribute of my dataset independently" ] }, { @@ -1935,8 +405,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#### The best way to evaluate the performance of an algorithm would be to make predictions for new data to which you already know the answers.\n", - "\n" + "#### The best way to evaluate the performance of an algorithm would be to make predictions for new data to which you already know the answers." ] }, { @@ -2050,7 +519,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Leave One Out Cross Validation" + "## Leave One Out Cross Validation\n", + "#### Its a special case of cross validation where the number of folds equals the number of instances in the data set thus the learning algorithm is applied once for each instance, using all other instances as a training set and using the selected instance as a single-item test set." ] }, { @@ -2174,14 +644,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Logistic Regression" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Logistic regression is best suited for binary classification: data sets where y = 0 or 1" + "## Logistic Regression\n", + "#### It's the appropriate regression analysis to conduct when the dependent variable is binary. So tried to use it on my data since it is binary and has no outliers" ] }, { @@ -2228,7 +692,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Logistic Regression Classification\n", + "# Logistic Regression Classification on untuched data\n", "\n", "num_folds = 10\n", "kfold = KFold(n_splits=10, random_state=7)\n", @@ -2264,6 +728,7 @@ "metadata": {}, "source": [ "## Linear Discriminant Analysis¶\n", + "#### Linear Discriminant Analysis(LDA) is a very common technique used for supervised classification problems. It reduces the dimensions by removing the reduntant and dependent features by transforming the features from higher dimensional space to a space with lower dimensions.\n", "\n" ] }, @@ -2312,7 +777,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### k-Nearest Neighbors" + "### k-Nearest Neighbors\n", + "#### Can solve both classification and regression problems. However, it is more widely used in classification problems so decided to try it and it gave me a very low score" ] }, { @@ -2592,28 +1058,16 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "> # BoostingAlgorithms" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### These seek to improve the prediction power by training a sequence of weak models, each compensating the weaknesses of its predecessors.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## AdaBoost" + "## BoostingAlgorithms\n", + "#### These seek to improve the prediction power by training a sequence of weak models, each compensating the weaknesses of its predecessors." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "#### This is specifically designed for classification problems" + "## AdaBoost\n", + "#### This is specifically designed for classification problems\n" ] }, { @@ -2740,7 +1194,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Random Forest" + "## Random Forest\n", + "#### The random forest combines hundreds or thousands of decision trees, trains each one on a slightly different set of the observations, splitting nodes in each tree considering a limited number of the features. The final predictions of the random forest are made by averaging the predictions of each individual tree.\n" ] }, { @@ -2979,35 +1434,6 @@ "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.4" - }, - "varInspector": { - "cols": { - "lenName": 16, - "lenType": 16, - "lenVar": 40 - }, - "kernels_config": { - "python": { - "delete_cmd_postfix": "", - "delete_cmd_prefix": "del ", - "library": "var_list.py", - "varRefreshCmd": "print(var_dic_list())" - }, - "r": { - "delete_cmd_postfix": ") ", - "delete_cmd_prefix": "rm(", - "library": "var_list.r", - "varRefreshCmd": "cat(var_dic_list()) " - } - }, - "types_to_exclude": [ - "module", - "function", - "builtin_function_or_method", - "instance", - "_Feature" - ], - "window_display": false } }, "nbformat": 4, From 81ab41d2bc12dacb90592cf3757cd37e1234356a Mon Sep 17 00:00:00 2001 From: nashim-kayaga <56426944+nashim-kayaga@users.noreply.github.com> Date: Mon, 16 Mar 2020 16:02:59 +0300 Subject: [PATCH 4/4] Updated Notebook --- Assignment Colab/KAYAGA NASHIM MILVAT.ipynb | 1442 +------------------ 1 file changed, 1 insertion(+), 1441 deletions(-) diff --git a/Assignment Colab/KAYAGA NASHIM MILVAT.ipynb b/Assignment Colab/KAYAGA NASHIM MILVAT.ipynb index d34411d..496a19d 100644 --- a/Assignment Colab/KAYAGA NASHIM MILVAT.ipynb +++ b/Assignment Colab/KAYAGA NASHIM MILVAT.ipynb @@ -1,1441 +1 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19", - "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5" - }, - "outputs": [], - "source": [ - "# This Python 3 environment comes with many helpful analytics libraries installed\n", - "# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python\n", - "# For example, here's several helpful packages to load in \n", - "\n", - "import numpy as np # linear algebra\n", - "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n", - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", - "# Input data files are available in the \"../input/\" directory.\n", - "# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n", - "\n", - "import os\n", - "for dirname, _, filenames in os.walk('/kaggle/input'):\n", - " for filename in filenames:\n", - " print(os.path.join(dirname, filename))\n", - " \n", - "\n", - "# Any results you write to the current directory are saved as output." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# References\n", - "#### https://machinelearningmastery.com/evaluate-performance-machine-learning-algorithms-python-using-resampling/\n", - "#### https://www.dataquest.io/blog/top-10-machine-learning-algorithms-for-beginners/\n", - "#### https://monkeylearn.com/blog/introduction-to-support-vector-machines-svm/\n", - "#### https://towardsdatascience.com/understanding-random-forest-58381e0602d2" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "_cell_guid": "79c7e3d0-c299-4dcb-8224-4455121ee9b0", - "_uuid": "d629ff2d2480ee46fbb7e2d37f6b5fab8052498a" - }, - "outputs": [], - "source": [ - "import warnings\n", - "warnings.filterwarnings('ignore')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "test = pd.read_csv(\"../input/ace-class-assignment/Test.csv\")\n", - "test" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "test.shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#read in the data\n", - "data = pd.read_csv(\"../input/ace-class-assignment/AMP_TrainSet.csv\")\n", - "data.head(5)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Analyze data by describing\n", - "\n", - "#### This step helped me know which features are in my dataset, are they categorical or numerical.\n", - "#### How many rows and columns does the dataset have\n", - "#### The data types for the various features\n", - "#### Checked whether the dataset has null or missing values" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#Check the dimensions to the number of rows and columns\n", - "data.shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "data.columns" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "data.dtypes" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#Generate descriptive statistics that summarize the central tendency, dispersion, and shape of a dataset’s distribution, excluding NaN values\n", - "data.describe()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#number of null values in each column\n", - "data.isnull().sum()\n", - "#since my data has no null values then its good to go" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### needed to know how balanced the class values are" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "data.groupby('CLASS').size().plot(kind='bar')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Its a good idea to review all the pairwise correlations of the attributes in the dataset because some machine learning algorithm like linear and logistic regression can suffer poor performance if there are highly correlated attributes in the dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "data.corr(method='pearson')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### heat map to show the correlation of the data; plots that show the interactions between multiple variables in the dataset\n", - "#### Correlation gives an indication of how related the changes are between two variables. If two variables change in the same direction they are positively correlated. If they change in opposite directions together (one goes up, one goes down), then they are negatively correlated. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(8,8))\n", - "sns.heatmap(data.corr(method='pearson'))\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### also checked the corelation in regards to the class since am trying to build a ML agorithm for that class" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "data.corr(method='pearson')['CLASS']" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Most of my variables are positively skewed" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - " data.skew().plot(kind='bar')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## understanding data with visualization\n", - "#### Data can be visualised in many ways that is univariate plots and multivariate plots #### Used the Histogram for univariate plot as shown below and the correlation matrix plot as the multivariate plot as shown above" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Histogram\n", - "#### This helps to understand each attribute of my dataset independently" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Data pre-processing" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(18,18))\n", - "data.hist()\n", - "plt.subplots_adjust(bottom=3, right=2, top=5)\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Standardize data\n", - "#### Standardization is a useful technique to transform attributes with a Gaussian distribution and differing means and standard deviations to a standard Gaussian distribution with a mean of 0 and a standard deviation of 1" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from sklearn.preprocessing import StandardScaler\n", - "array = data.values\n", - "#separate array into input and output components\n", - "X = array[:,0:11]\n", - "Y = array[:,11]\n", - "scaler = StandardScaler().fit(X)\n", - "rescaledX = scaler.transform(X)\n", - "# summarize transformed data\n", - "#set_printoptions(precision=3)\n", - "print(rescaledX[0:5,:])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "array = test.values\n", - "scaler = StandardScaler().fit(array)\n", - "rescaledt = scaler.transform(array)\n", - "# summarize transformed data\n", - "#set_printoptions(precision=3)\n", - "print(rescaledt[0:5,:])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Feature selection\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### it's the process of selecting a subset of relevant features for use in model construction" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Chose Recursive Feature Elimination\n", - "#### This is an automatic feature selection technique\n", - "#### Used logistic regression it is a good baseline as it is fast to train and predict and scales well.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from sklearn.feature_selection import RFE\n", - "from sklearn.linear_model import LogisticRegression\n", - "\n", - "array = data.values\n", - "X = array[:,0:11]\n", - "Y = array[:,11]\n", - "# feature extraction\n", - "model = LogisticRegression()\n", - "rfe = RFE(model,8)\n", - "fit = rfe.fit(X,Y)\n", - "print(\"Num Features:\", fit.n_features_)\n", - "print(\"Selected Features:\", fit.support_)\n", - "print(\"Feature Ranking:\", fit.ranking_)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "X[:,fit.support_]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "drop=data.drop(['FULL_AcidicMolPerc', 'FULL_DAYM780201', 'AS_DAYM780201'],axis=1)\n", - "drop" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "drop_test = test.drop(['FULL_AcidicMolPerc', 'FULL_DAYM780201', 'AS_DAYM780201'],axis=1)\n", - "drop_test" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "1. #### Decided to first use all the first\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Evaluate the Performance of Machine Learning Algorithms with Resampling¶\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### The best way to evaluate the performance of an algorithm would be to make predictions for new data to which you already know the answers." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Split into Train and Test Sets" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### This algorithm evaluation technique is very fast. It is ideal for large datasets where there is strong evidence that both splits of the data are representative of the underlying problem. Because of the speed, it is useful to use this approach when the algorithm you are investigating is slow to train.\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from sklearn.model_selection import train_test_split\n", - "from sklearn.linear_model import LogisticRegression\n", - "\n", - "array = data.values\n", - "X = array[:,0:11]\n", - "Y = array[:,11]\n", - "test_size = 0.30\n", - "seed = 7\n", - "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=test_size,\n", - "random_state=seed)\n", - "model = LogisticRegression()\n", - "model.fit(X_train, Y_train)\n", - "result = model.score(X_test, Y_test)\n", - "print(\"Accuracy: \", (result*100.0))\n", - "\n", - "\n", - "model.fit(X,Y)\n", - "output = model.predict(test.values)\n", - "\n", - "from sklearn.metrics import matthews_corrcoef\n", - "mcc = matthews_corrcoef(model.predict(X),Y)\n", - "print('MCC:',mcc)\n", - " \n", - "report = pd.DataFrame(output)\n", - "report.columns = ['CLASS']\n", - "report.index.name = \"Index\"\n", - "report['CLASS']=report['CLASS'].map({0.0:False, 1.0:True})\n", - "report.to_csv(\"report.csv\")\n", - "\n", - "print(report['CLASS'].unique())\n", - "print('False: ',report.groupby('CLASS').size()[0].sum())\n", - "print('True: ',report.groupby('CLASS').size()[1].sum())\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## K-fold Cross Validation" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### It is more accurate because the algorithm is trained and evaluated multiple times on different data." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from sklearn.model_selection import KFold\n", - "from sklearn.model_selection import cross_val_score\n", - "\n", - "num_folds = 10 #number of folds to use\n", - "seed = 7 #reproducibility\n", - "\n", - "kfold = KFold(n_splits=num_folds, random_state=seed)\n", - "model = LogisticRegression()\n", - "results = cross_val_score(model, X, Y, cv=kfold)\n", - "\n", - "print(f\"Accuracy:\", (results.mean()*100.0, results.std()*100.0))\n", - "\n", - "\n", - "model.fit(X,Y)\n", - "output = model.predict(test.values)\n", - "\n", - "from sklearn.metrics import matthews_corrcoef\n", - "mcc = matthews_corrcoef(model.predict(X),Y)\n", - "print('MCC:',mcc)\n", - " \n", - "report_kf = pd.DataFrame(output)\n", - "report_kf.columns = ['CLASS']\n", - "report_kf.index.name = \"Index\"\n", - "report_kf['CLASS']=report_kf['CLASS'].map({0.0:False, 1.0:True})\n", - "report_kf.to_csv(\"report_kf.csv\")\n", - "\n", - "print(report_kf['CLASS'].unique())\n", - "print('False: ',report_kf.groupby('CLASS').size()[0].sum())\n", - "print('True: ',report_kf.groupby('CLASS').size()[1].sum())\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Leave One Out Cross Validation\n", - "#### Its a special case of cross validation where the number of folds equals the number of instances in the data set thus the learning algorithm is applied once for each instance, using all other instances as a training set and using the selected instance as a single-item test set." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from sklearn.model_selection import LeaveOneOut\n", - "from sklearn.model_selection import cross_val_score\n", - "\n", - "num_folds = 10\n", - "loocv = LeaveOneOut()\n", - "model = LogisticRegression()\n", - "results = cross_val_score(model, X, Y, cv=loocv)\n", - "print(\"Accuracy:\", (results.mean()*100.0, results.std()*100.0))\n", - "\n", - "\n", - "model.fit(X,Y)\n", - "output = model.predict(test.values)\n", - "\n", - "from sklearn.metrics import matthews_corrcoef\n", - "mcc = matthews_corrcoef(model.predict(X),Y)\n", - "print('MCC:',mcc)\n", - " \n", - "report_l = pd.DataFrame(output)\n", - "report_l.columns = ['CLASS']\n", - "report_l.index.name = \"Index\"\n", - "report_l['CLASS']=report_l['CLASS'].map({0.0:False, 1.0:True})\n", - "report_l.to_csv(\"report_l.csv\")\n", - "\n", - "print(report_l['CLASS'].unique())\n", - "print('False: ',report_l.groupby('CLASS').size()[0].sum())\n", - "print('True: ',report_l.groupby('CLASS').size()[1].sum())\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Repeated Random Test-Train Splits" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Creates a random split of the data like the train/test split , but repeats the process of splitting and evaluation of the algorithm multiple times, like cross validation. Repeated random splits can be useful intermediates when trying to balance variance in the estimated performance, model training speed and dataset size\n", - "#### In this I prefered using Repeated Random Test_Train Splits because when you look at the dataset the zeros are one side and the ones on the otherside in the 'class' column. So I would prefer to first shuffle the data and then split it to reduce on the bias" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from sklearn.model_selection import ShuffleSplit\n", - "from sklearn.model_selection import cross_val_score\n", - "\n", - "n_splits = 10\n", - "test_size = 0.30\n", - "seed = 7\n", - "kfold = ShuffleSplit(n_splits=n_splits, test_size=test_size, random_state=seed)\n", - "model = LogisticRegression()\n", - "results = cross_val_score(model, X, Y, cv=kfold)\n", - "print(\"Accuracy: \" , (results.mean()*100.0, results.std()*100.0))\n", - "\n", - "\n", - "model.fit(X,Y)\n", - "output = model.predict(test.values)\n", - "\n", - "from sklearn.metrics import matthews_corrcoef\n", - "mcc = matthews_corrcoef(model.predict(X),Y)\n", - "print('MCC:',mcc)\n", - " \n", - "report_rrt = pd.DataFrame(output)\n", - "report_rrt.columns = ['CLASS']\n", - "report_rrt.index.name = \"Index\"\n", - "report_rrt['CLASS']=report_rrt['CLASS'].map({0.0:False, 1.0:True})\n", - "report_rrt.to_csv(\"report_rrt.csv\")\n", - "\n", - "print(report_rrt['CLASS'].unique())\n", - "print('False: ',report_rrt.groupby('CLASS').size()[0].sum())\n", - "print('True: ',report_rrt.groupby('CLASS').size()[1].sum())\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Machine Learning Algorithm Performance Metrics" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Algorithms Overview\n", - "### linear machine learning algorithms:\n", - "\n", - " Logistic Regression.\n", - " Linear Discriminant Analysis.\n", - "### onlinear machine learning algorithms\n", - "\n", - " k-Nearest Neighbors.\n", - " Naive Bayes.\n", - " Classication and Regression Trees.\n", - " Support Vector Machines.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Linear Machine Learning Algorithms" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Logistic Regression\n", - "#### It's the appropriate regression analysis to conduct when the dependent variable is binary. So tried to use it on my data since it is binary and has no outliers" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Using standardized data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Logistic regression on standardized data\n", - "num_folds = 10\n", - "kfold = KFold(n_splits=10, random_state=7)\n", - "model = LogisticRegression()\n", - "results = cross_val_score(model, X, Y, cv=kfold)\n", - "print(results.mean())\n", - "\n", - "model.fit(rescaledX,Y)\n", - "output = model.predict(rescaledt)\n", - "\n", - "from sklearn.metrics import matthews_corrcoef\n", - "mcc = matthews_corrcoef(model.predict(X),Y)\n", - "print('MCC:',mcc)\n", - " \n", - "report_scaled = pd.DataFrame(output)\n", - "report_scaled.columns = ['CLASS']\n", - "report_scaled.index.name = \"Index\"\n", - "report_scaled['CLASS']=report_scaled['CLASS'].map({0.0:False, 1.0:True})\n", - "report_scaled.to_csv(\"report_scaled.csv\")\n", - "\n", - "print(report_scaled['CLASS'].unique())\n", - "print('False: ',report_scaled.groupby('CLASS').size()[0].sum())\n", - "print('True: ',report_scaled.groupby('CLASS').size()[1].sum())\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Logistic Regression Classification on untuched data\n", - "\n", - "num_folds = 10\n", - "kfold = KFold(n_splits=10, random_state=7)\n", - "model = LogisticRegression()\n", - "results = cross_val_score(model, X, Y, cv=kfold)\n", - "print(results.mean())\n", - "\n", - "model.fit(X,Y)\n", - "output = model.predict(test.values)\n", - "\n", - "from sklearn.metrics import matthews_corrcoef\n", - "mcc = matthews_corrcoef(model.predict(X),Y)\n", - "print('MCC:',mcc)\n", - " \n", - "my_report = pd.DataFrame(output)\n", - "my_report.columns = ['CLASS']\n", - "my_report.index.name = \"Index\"\n", - "my_report['CLASS']=my_report['CLASS'].map({0.0:False, 1.0:True})\n", - "my_report.to_csv(\"report_XGB.csv\")\n", - "\n", - "print(my_report['CLASS'].unique())\n", - "print('False: ',my_report.groupby('CLASS').size()[0].sum())\n", - "print('True: ',my_report.groupby('CLASS').size()[1].sum())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Linear Discriminant Analysis¶\n", - "#### Linear Discriminant Analysis(LDA) is a very common technique used for supervised classification problems. It reduces the dimensions by removing the reduntant and dependent features by transforming the features from higher dimensional space to a space with lower dimensions.\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n", - "\n", - "num_folds = 10\n", - "kfold = KFold(n_splits=10, random_state=7)\n", - "model = LinearDiscriminantAnalysis()\n", - "results = cross_val_score(model, X, Y, cv=kfold)\n", - "print(results.mean())\n", - "\n", - "\n", - "model.fit(X,Y)\n", - "output = model.predict(test.values)\n", - "\n", - "from sklearn.metrics import matthews_corrcoef\n", - "mcc = matthews_corrcoef(model.predict(X),Y)\n", - "print('MCC:',mcc)\n", - " \n", - "lda_report = pd.DataFrame(output)\n", - "lda_report.columns = ['CLASS']\n", - "lda_report.index.name = \"Index\"\n", - "lda_report['CLASS']=lda_report['CLASS'].map({0.0:False, 1.0:True})\n", - "lda_report.to_csv(\"ldareport.csv\")\n", - "\n", - "print(lda_report['CLASS'].unique())\n", - "print('False: ',lda_report.groupby('CLASS').size()[0].sum())\n", - "print('True: ',lda_report.groupby('CLASS').size()[1].sum())\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Nonlinear Machine Learning Algorithms" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### k-Nearest Neighbors\n", - "#### Can solve both classification and regression problems. However, it is more widely used in classification problems so decided to try it and it gave me a very low score" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from sklearn.neighbors import KNeighborsClassifier\n", - "num_folds = 10\n", - "kfold = KFold(n_splits=10, random_state=7)\n", - "model = KNeighborsClassifier()\n", - "results = cross_val_score(model, X, Y, cv=kfold)\n", - "print(results.mean())\n", - "\n", - "\n", - "\n", - "model.fit(X,Y)\n", - "output = model.predict(test.values)\n", - "\n", - "from sklearn.metrics import matthews_corrcoef\n", - "mcc = matthews_corrcoef(model.predict(X),Y)\n", - "print('MCC:',mcc)\n", - " \n", - "report_k = pd.DataFrame(output)\n", - "report_k.columns = ['CLASS']\n", - "report_k.index.name = \"Index\"\n", - "report_k['CLASS']=report_k['CLASS'].map({0.0:False, 1.0:True})\n", - "report_k.to_csv(\"report_k.csv\")\n", - "\n", - "\n", - "print(report_k['CLASS'].unique())\n", - "print('False: ',report_k.groupby('CLASS').size()[0].sum())\n", - "print('True: ',report_k.groupby('CLASS').size()[1].sum())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Naive Bayes" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Tried using Standardised data on Naive Bayes\n", - "\n", - "### When I predicted Naive Bayes on Standardised data gave me a score of 0.98235, after feature selection it gave 0.90 and on unstandardised data it gave a score of 0.9959" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Naive Bayes on standardised data\n", - "from sklearn.naive_bayes import GaussianNB\n", - "\n", - "kfold = KFold(n_splits=10, random_state=7)\n", - "model = GaussianNB()\n", - "results = cross_val_score(model, X, Y, cv=kfold)\n", - "print(results.mean())\n", - "\n", - "\n", - "model.fit(rescaledX,Y)\n", - "output = model.predict(rescaledt)\n", - "\n", - "from sklearn.metrics import matthews_corrcoef\n", - "mcc = matthews_corrcoef(model.predict(X),Y)\n", - "print('MCC:',mcc)\n", - " \n", - "report_rebayes = pd.DataFrame(output)\n", - "report_rebayes.columns = ['CLASS']\n", - "report_rebayes.index.name = \"Index\"\n", - "report_rebayes['CLASS']=report_rebayes['CLASS'].map({0.0:False, 1.0:True})\n", - "report_rebayes.to_csv(\"report_rebayes.csv\")\n", - "\n", - "\n", - "print(report_rebayes['CLASS'].unique())\n", - "print('False: ',report_rebayes.groupby('CLASS').size()[0].sum())\n", - "print('True: ',report_rebayes.groupby('CLASS').size()[1].sum())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Naive Bayes on selected features" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Naive Bayes on selected features\n", - "\n", - "array = data.values\n", - "X = array[:,0:11]\n", - "Y = array[:,11]\n", - "\n", - "selectedX = X[:,fit.support_]\n", - "\n", - "array2 =test.values\n", - "selectedT = array2[:,fit.support_]\n", - "\n", - "kfold = KFold(n_splits=10, random_state=7)\n", - "model = GaussianNB()\n", - "results = cross_val_score(model, selectedX, Y, cv=kfold)\n", - "print(results.mean())\n", - "\n", - "\n", - "model.fit(selectedX,Y)\n", - "output = model.predict(selectedT)\n", - "\n", - "from sklearn.metrics import matthews_corrcoef\n", - "mcc = matthews_corrcoef(model.predict(selectedX),Y)\n", - "print('MCC:',mcc)\n", - " \n", - "report_sel = pd.DataFrame(output)\n", - "report_sel.columns = ['CLASS']\n", - "report_sel.index.name = \"Index\"\n", - "report_sel['CLASS']=report_sel['CLASS'].map({0.0:False, 1.0:True})\n", - "report_sel.to_csv(\"report_sel.csv\")\n", - "\n", - "\n", - "print(report_sel['CLASS'].unique())\n", - "print('False: ',report_sel.groupby('CLASS').size()[0].sum())\n", - "print('True: ',report_sel.groupby('CLASS').size()[1].sum())\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from sklearn.naive_bayes import GaussianNB\n", - "\n", - "kfold = KFold(n_splits=10, random_state=7)\n", - "model = GaussianNB()\n", - "results = cross_val_score(model, X, Y, cv=kfold)\n", - "print(results.mean())\n", - "\n", - "\n", - "model.fit(X,Y)\n", - "output = model.predict(test.values)\n", - "\n", - "from sklearn.metrics import matthews_corrcoef\n", - "mcc = matthews_corrcoef(model.predict(X),Y)\n", - "print('MCC:',mcc)\n", - " \n", - "report_bayes = pd.DataFrame(output)\n", - "report_bayes.columns = ['CLASS']\n", - "report_bayes.index.name = \"Index\"\n", - "report_bayes['CLASS']=report_bayes['CLASS'].map({0.0:False, 1.0:True})\n", - "report_bayes.to_csv(\"report_bayes.csv\")\n", - "\n", - "\n", - "print(report_bayes['CLASS'].unique())\n", - "print('False: ',report_bayes.groupby('CLASS').size()[0].sum())\n", - "print('True: ',report_bayes.groupby('CLASS').size()[1].sum())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Classiffication and Regression Trees" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### used for classification or regression predictive modeling problems" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from sklearn.tree import DecisionTreeClassifier\n", - "kfold = KFold(n_splits=10, random_state=7)\n", - "model = DecisionTreeClassifier()\n", - "results = cross_val_score(model, X, Y, cv=kfold)\n", - "print(results.mean())\n", - "\n", - "\n", - "model.fit(X,Y)\n", - "output = model.predict(test.values)\n", - "\n", - "from sklearn.metrics import matthews_corrcoef\n", - "mcc = matthews_corrcoef(model.predict(X),Y)\n", - "print('MCC:',mcc)\n", - " \n", - "report_tree = pd.DataFrame(output)\n", - "report_tree.columns = ['CLASS']\n", - "report_tree.index.name = \"Index\"\n", - "report_tree['CLASS']=report_tree['CLASS'].map({0.0:False, 1.0:True})\n", - "report_tree.to_csv(\"report_tree.csv\")\n", - "\n", - "\n", - "print(report_tree['CLASS'].unique())\n", - "print('False: ',report_tree.groupby('CLASS').size()[0].sum())\n", - "print('True: ',report_tree.groupby('CLASS').size()[1].sum())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Support Vector Machines " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### A support vector machine (SVM) is a supervised machine learning model that uses classification algorithms for two-group classification problems" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from sklearn.svm import SVC\n", - "\n", - "kfold = KFold(n_splits=10, random_state=7)\n", - "model = SVC()\n", - "results = cross_val_score(model, X, Y, cv=kfold)\n", - "print(results.mean())\n", - "\n", - "model.fit(X,Y)\n", - "output = model.predict(test.values)\n", - "\n", - "from sklearn.metrics import matthews_corrcoef\n", - "mcc = matthews_corrcoef(model.predict(X),Y)\n", - "print('MCC:',mcc)\n", - " \n", - "report_svm = pd.DataFrame(output)\n", - "report_svm.columns = ['CLASS']\n", - "report_svm.index.name = \"Index\"\n", - "report_svm['CLASS']=report_svm['CLASS'].map({0.0:False, 1.0:True})\n", - "report_svm.to_csv(\"report_svm.csv\")\n", - "\n", - "\n", - "print(report_svm['CLASS'].unique())\n", - "print('False: ',report_svm.groupby('CLASS').size()[0].sum())\n", - "print('True: ',report_svm.groupby('CLASS').size()[1].sum())\n", - "\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Combine Models Into Ensemble Predictions\n", - "\n", - "The three most popular methods for combining the predictions from different models are:\n", - " \n", - " Bagging\n", - " Boosting\n", - " Voting" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## BoostingAlgorithms\n", - "#### These seek to improve the prediction power by training a sequence of weak models, each compensating the weaknesses of its predecessors." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## AdaBoost\n", - "#### This is specifically designed for classification problems\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# AdaBoost Classification\n", - "from pandas import read_csv\n", - "from sklearn.model_selection import KFold\n", - "from sklearn.model_selection import cross_val_score\n", - "from sklearn.ensemble import AdaBoostClassifier\n", - "\n", - "\n", - "X = array[:,0:11]\n", - "Y = array[:,11]\n", - "\n", - "num_trees = 39\n", - "seed=10\n", - "\n", - "kfold = KFold(n_splits=10, random_state=seed)\n", - "\n", - "model = AdaBoostClassifier(n_estimators=num_trees, random_state=seed)\n", - "results = cross_val_score(model, X, Y, cv=kfold)\n", - "\n", - "print(results.mean())\n", - "\n", - "model.fit(X,Y)\n", - "output = model.predict(test.values)\n", - "\n", - "from sklearn.metrics import matthews_corrcoef\n", - "mcc = matthews_corrcoef(model.predict(X),Y)\n", - "print('MCC:',mcc)\n", - " \n", - "report_ada = pd.DataFrame(output)\n", - "report_ada.columns = ['CLASS']\n", - "report_ada.index.name = \"Index\"\n", - "report_ada['CLASS']=report_ada['CLASS'].map({0.0:False, 1.0:True})\n", - "report_ada.to_csv(\"report_ada.csv\")\n", - "\n", - "\n", - "print(report_ada['CLASS'].unique())\n", - "print('False: ',report_ada.groupby('CLASS').size()[0].sum())\n", - "print('True: ',report_ada.groupby('CLASS').size()[1].sum())\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Bagging Algorithms" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Bagging is used with decision trees where it significantly raises the stability of models in the reduction of variance and improving accuracy, which eliminates the challenge of overfitting." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Bagged Decision Trees" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Bagged Decision Trees for Classification\n", - "from pandas import read_csv\n", - "from sklearn.model_selection import KFold\n", - "from sklearn.model_selection import cross_val_score\n", - "from sklearn.ensemble import BaggingClassifier\n", - "from sklearn.tree import DecisionTreeClassifier\n", - "\n", - "#split the data in portions\n", - "X = array[:,0:11]\n", - "Y = array[:,11]\n", - "seed = 7 #duplication\n", - "\n", - "#split according to cross validation\n", - "kfold = KFold(n_splits=10, random_state=seed)\n", - "\n", - "#initialize the model\n", - "cart = DecisionTreeClassifier()\n", - "\n", - "#bagging\n", - "num_trees = 250\n", - "\n", - "#model\n", - "model = BaggingClassifier(base_estimator=cart, n_estimators=num_trees, random_state=seed)\n", - "\n", - "results = cross_val_score(model, X, Y, cv=kfold)\n", - "print(results.mean())\n", - "\n", - "model.fit(X,Y)\n", - "output = model.predict(test.values)\n", - "\n", - "from sklearn.metrics import matthews_corrcoef\n", - "mcc = matthews_corrcoef(model.predict(X),Y)\n", - "print('MCC:',mcc)\n", - " \n", - "report_bag = pd.DataFrame(output)\n", - "report_bag.columns = ['CLASS']\n", - "report_bag.index.name = \"Index\"\n", - "report_bag['CLASS']=report_bag['CLASS'].map({0.0:False, 1.0:True})\n", - "report_bag.to_csv(\"report_bag.csv\")\n", - "\n", - "\n", - "print(report_bag['CLASS'].unique())\n", - "print('False: ',report_bag.groupby('CLASS').size()[0].sum())\n", - "print('True: ',report_bag.groupby('CLASS').size()[1].sum())\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Random Forest\n", - "#### The random forest combines hundreds or thousands of decision trees, trains each one on a slightly different set of the observations, splitting nodes in each tree considering a limited number of the features. The final predictions of the random forest are made by averaging the predictions of each individual tree.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Random Forest Classification\n", - "from pandas import read_csv\n", - "from sklearn.model_selection import KFold\n", - "from sklearn.model_selection import cross_val_score\n", - "from sklearn.ensemble import RandomForestClassifier\n", - "\n", - "\n", - "X = array[:,0:11]\n", - "Y = array[:,11]\n", - "\n", - "num_trees = 1000\n", - "\n", - "max_features = 3\n", - "\n", - "kfold = KFold(n_splits=10, random_state=7)\n", - "model = RandomForestClassifier(n_estimators=num_trees, max_features=max_features)\n", - "results = cross_val_score(model, X, Y, cv=kfold)\n", - "print(results.mean())\n", - "\n", - "model.fit(X,Y)\n", - "output = model.predict(test.values)\n", - "\n", - "from sklearn.metrics import matthews_corrcoef\n", - "mcc = matthews_corrcoef(model.predict(X),Y)\n", - "print('MCC:',mcc)\n", - " \n", - "report_rf = pd.DataFrame(output)\n", - "report_rf.columns = ['CLASS']\n", - "report_rf.index.name = \"Index\"\n", - "report_rf['CLASS']=report_rf['CLASS'].map({0.0:False, 1.0:True})\n", - "report_rf.to_csv(\"report_rf.csv\")\n", - "\n", - "\n", - "print(report_rf['CLASS'].unique())\n", - "print('False: ',report_rf.groupby('CLASS').size()[0].sum())\n", - "print('True: ',report_rf.groupby('CLASS').size()[1].sum())\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Extra Trees" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from sklearn.ensemble import ExtraTreesClassifier\n", - "\n", - "X = array[:,0:11]\n", - "Y = array[:,11]\n", - "\n", - "num_trees = 100\n", - "max_features = 7\n", - "\n", - "kfold = KFold(n_splits=10, random_state=7)\n", - "\n", - "model = ExtraTreesClassifier(n_estimators=num_trees, max_features=max_features)\n", - "\n", - "results = cross_val_score(model, X, Y, cv=kfold)\n", - "\n", - "print(results.mean())\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Voting Ensemble" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Voting Ensemble for Classification\n", - "from pandas import read_csv\n", - "from sklearn.model_selection import KFold\n", - "from sklearn.model_selection import cross_val_score\n", - "from sklearn.linear_model import LogisticRegression\n", - "from sklearn.tree import DecisionTreeClassifier\n", - "from xgboost import XGBClassifier\n", - "from sklearn.ensemble import RandomForestClassifier\n", - "from sklearn.svm import SVC\n", - "from sklearn.ensemble import VotingClassifier\n", - "\n", - "\n", - "X = array[:,0:11]\n", - "Y = array[:,11]\n", - "kfold = KFold(n_splits=10, random_state=7)\n", - "\n", - "# create the sub models\n", - "estimators = []\n", - "model1 = LogisticRegression()\n", - "estimators.append(('logistic', model1))\n", - "\n", - "model2 = DecisionTreeClassifier()\n", - "estimators.append(('cart', model2))\n", - "\n", - "model3 = SVC()\n", - "estimators.append(('svm', model3))\n", - "\n", - "model4 = XGBClassifier()\n", - "estimators.append(('xgb', model4))\n", - "\n", - "model5 = RandomForestClassifier()\n", - "estimators.append(('rfc', model5))\n", - "\n", - "# create the ensemble model\n", - "ensemble = VotingClassifier(estimators)\n", - "results = cross_val_score(ensemble, X, Y, cv=kfold)\n", - "print(results.mean())\n", - "\n", - "\n", - "model.fit(X,Y)\n", - "output = model.predict(test.values)\n", - "\n", - "from sklearn.metrics import matthews_corrcoef\n", - "mcc = matthews_corrcoef(model.predict(X),Y)\n", - "print('MCC:',mcc)\n", - " \n", - "report_v = pd.DataFrame(output)\n", - "report_v.columns = ['CLASS']\n", - "report_v.index.name = \"Index\"\n", - "report_v['CLASS']=report_v['CLASS'].map({0.0:False, 1.0:True})\n", - "report_v.to_csv(\"report_v.csv\")\n", - "\n", - "\n", - "print(report_v['CLASS'].unique())\n", - "print('False: ',report_v.groupby('CLASS').size()[0].sum())\n", - "print('True: ',report_v.groupby('CLASS').size()[1].sum())\n", - "\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## comparing the algorithms" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "# prepare models and add them to a list\n", - "from matplotlib import pyplot\n", - "\n", - "models = []\n", - "models.append(('LR', LogisticRegression(solver='liblinear', multi_class='ovr')))\n", - "models.append(('LDA', LinearDiscriminantAnalysis()))\n", - "models.append(('KNN', KNeighborsClassifier()))\n", - "models.append(('CART', DecisionTreeClassifier()))\n", - "models.append(('NB', GaussianNB()))\n", - "models.append(('SVM', SVC(gamma='auto')))\n", - "models.append(('ETC', ExtraTreesClassifier()))\n", - "models.append(('RFC', RandomForestClassifier()))\n", - "\n", - "# evaluate each model in turn\n", - "results = []\n", - "names = []\n", - "scoring = 'accuracy'\n", - "\n", - "for name, model in models:\n", - " kfold = KFold(n_splits=10, random_state=7)\n", - " cv_results = cross_val_score(model, X, Y, cv=kfold, scoring=scoring)\n", - " results.append(cv_results)\n", - " names.append(name)\n", - " msg = (name, cv_results.mean(), cv_results.std())\n", - " print(msg)\n", - "\n", - "# boxplot algorithm comparison\n", - "fig = pyplot.figure()\n", - "fig.suptitle('Algorithm Comparison')\n", - "ax = fig.add_subplot(111)\n", - "pyplot.boxplot(results)\n", - "ax.set_xticklabels(names)\n", - "pyplot.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# '''''''''''''''''''''''''''''''END''''''''''''''''''''''''''''''" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.4" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} +{"cells":[{"metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","trusted":true},"cell_type":"code","source":"# This Python 3 environment comes with many helpful analytics libraries installed\n# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python\n# For example, here's several helpful packages to load in \n\nimport numpy as np # linear algebra\nimport pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\nimport matplotlib.pyplot as plt\nimport seaborn as sns\n# Input data files are available in the \"../input/\" directory.\n# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n\nimport os\nfor dirname, _, filenames in os.walk('/kaggle/input'):\n for filename in filenames:\n print(os.path.join(dirname, filename))\n \n\n# Any results you write to the current directory are saved as output.","execution_count":1,"outputs":[{"output_type":"stream","text":"/kaggle/input/ace-class-assignment/AMP_TrainSet.csv\n/kaggle/input/ace-class-assignment/Test.csv\n","name":"stdout"}]},{"metadata":{},"cell_type":"markdown","source":" # KAYAGA NASHIM MILVAT 2019/HD07/28450U"},{"metadata":{"_uuid":"d629ff2d2480ee46fbb7e2d37f6b5fab8052498a","_cell_guid":"79c7e3d0-c299-4dcb-8224-4455121ee9b0","trusted":true},"cell_type":"code","source":"import warnings\nwarnings.filterwarnings('ignore')","execution_count":2,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"test = pd.read_csv(\"../input/ace-class-assignment/Test.csv\")\ntest","execution_count":3,"outputs":[{"output_type":"execute_result","execution_count":3,"data":{"text/plain":" FULL_Charge FULL_AcidicMolPerc FULL_AURR980107 FULL_DAYM780201 \\\n0 4.0 3.704 0.873 73.519 \n1 4.0 4.444 0.892 62.444 \n2 2.0 0.000 0.901 47.000 \n3 4.5 0.000 0.869 69.222 \n4 -4.0 21.591 1.061 71.682 \n.. ... ... ... ... \n753 -1.5 16.000 1.100 82.820 \n754 -1.0 18.182 1.085 73.455 \n755 -1.0 19.048 1.108 82.190 \n756 -1.0 7.143 0.955 76.786 \n757 -7.0 17.143 1.078 84.186 \n\n FULL_GEOR030101 FULL_OOBM850104 NT_EFC195 AS_MeanAmphiMoment \\\n0 0.987 -4.833 0 0.382 \n1 0.931 -0.584 0 0.320 \n2 1.039 -5.664 0 0.164 \n3 0.982 -5.423 0 2.010 \n4 0.976 -2.002 0 2.758 \n.. ... ... ... ... \n753 0.991 -1.987 0 15.185 \n754 1.027 -0.745 0 16.550 \n755 1.033 -1.789 0 16.112 \n756 1.023 1.141 0 20.630 \n757 1.009 -0.066 0 17.168 \n\n AS_DAYM780201 AS_FUKS010112 CT_RACS820104 \n0 74.556 7.225 1.234 \n1 56.056 4.942 1.853 \n2 47.000 5.969 1.174 \n3 69.222 5.462 1.138 \n4 66.000 5.582 1.453 \n.. ... ... ... \n753 85.333 7.053 1.325 \n754 74.667 6.729 1.132 \n755 79.667 6.036 1.219 \n756 76.786 5.669 1.111 \n757 76.611 6.688 1.305 \n\n[758 rows x 11 columns]","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
FULL_ChargeFULL_AcidicMolPercFULL_AURR980107FULL_DAYM780201FULL_GEOR030101FULL_OOBM850104NT_EFC195AS_MeanAmphiMomentAS_DAYM780201AS_FUKS010112CT_RACS820104
04.03.7040.87373.5190.987-4.83300.38274.5567.2251.234
14.04.4440.89262.4440.931-0.58400.32056.0564.9421.853
22.00.0000.90147.0001.039-5.66400.16447.0005.9691.174
34.50.0000.86969.2220.982-5.42302.01069.2225.4621.138
4-4.021.5911.06171.6820.976-2.00202.75866.0005.5821.453
....................................
753-1.516.0001.10082.8200.991-1.987015.18585.3337.0531.325
754-1.018.1821.08573.4551.027-0.745016.55074.6676.7291.132
755-1.019.0481.10882.1901.033-1.789016.11279.6676.0361.219
756-1.07.1430.95576.7861.0231.141020.63076.7865.6691.111
757-7.017.1431.07884.1861.009-0.066017.16876.6116.6881.305
\n

758 rows × 11 columns

\n
"},"metadata":{}}]},{"metadata":{"trusted":true},"cell_type":"code","source":"test.shape","execution_count":4,"outputs":[{"output_type":"execute_result","execution_count":4,"data":{"text/plain":"(758, 11)"},"metadata":{}}]},{"metadata":{"trusted":true},"cell_type":"code","source":"#read in the data\ndata = pd.read_csv(\"../input/ace-class-assignment/AMP_TrainSet.csv\")\ndata.head(5)","execution_count":5,"outputs":[{"output_type":"execute_result","execution_count":5,"data":{"text/plain":" FULL_Charge FULL_AcidicMolPerc FULL_AURR980107 FULL_DAYM780201 \\\n0 5.0 0.000 0.951 74.842 \n1 4.0 5.405 0.931 71.595 \n2 5.5 5.405 0.873 73.595 \n3 5.0 4.167 0.895 66.250 \n4 7.5 8.537 0.932 64.720 \n\n FULL_GEOR030101 FULL_OOBM850104 NT_EFC195 AS_MeanAmphiMoment \\\n0 0.975 -3.663 0 0.282 \n1 0.957 -4.011 1 0.600 \n2 0.961 -2.512 0 0.593 \n3 0.999 -1.362 0 0.614 \n4 0.979 -2.091 0 0.616 \n\n AS_DAYM780201 AS_FUKS010112 CT_RACS820104 CLASS \n0 73.444 5.661 1.041 1 \n1 68.222 6.537 1.453 1 \n2 69.444 4.934 1.722 1 \n3 67.222 4.316 1.382 1 \n4 72.944 4.540 1.539 1 ","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
FULL_ChargeFULL_AcidicMolPercFULL_AURR980107FULL_DAYM780201FULL_GEOR030101FULL_OOBM850104NT_EFC195AS_MeanAmphiMomentAS_DAYM780201AS_FUKS010112CT_RACS820104CLASS
05.00.0000.95174.8420.975-3.66300.28273.4445.6611.0411
14.05.4050.93171.5950.957-4.01110.60068.2226.5371.4531
25.55.4050.87373.5950.961-2.51200.59369.4444.9341.7221
35.04.1670.89566.2500.999-1.36200.61467.2224.3161.3821
47.58.5370.93264.7200.979-2.09100.61672.9444.5401.5391
\n
"},"metadata":{}}]},{"metadata":{},"cell_type":"markdown","source":""},{"metadata":{},"cell_type":"markdown","source":"## Analyze data by describing\n\n#### This step helped me know which features are in my dataset, are they categorical or numerical.\n#### How many rows and columns does the dataset have\n#### The data types for the various features\n#### Checked whether the dataset has null or missing values"},{"metadata":{"trusted":true},"cell_type":"code","source":"#Check the dimensions to the number of rows and columns\ndata.shape","execution_count":6,"outputs":[{"output_type":"execute_result","execution_count":6,"data":{"text/plain":"(3038, 12)"},"metadata":{}}]},{"metadata":{"trusted":true},"cell_type":"code","source":"data.columns","execution_count":7,"outputs":[{"output_type":"execute_result","execution_count":7,"data":{"text/plain":"Index(['FULL_Charge', 'FULL_AcidicMolPerc', 'FULL_AURR980107',\n 'FULL_DAYM780201', 'FULL_GEOR030101', 'FULL_OOBM850104', 'NT_EFC195',\n 'AS_MeanAmphiMoment', 'AS_DAYM780201', 'AS_FUKS010112', 'CT_RACS820104',\n 'CLASS'],\n dtype='object')"},"metadata":{}}]},{"metadata":{"trusted":true},"cell_type":"code","source":"data.dtypes","execution_count":8,"outputs":[{"output_type":"execute_result","execution_count":8,"data":{"text/plain":"FULL_Charge float64\nFULL_AcidicMolPerc float64\nFULL_AURR980107 float64\nFULL_DAYM780201 float64\nFULL_GEOR030101 float64\nFULL_OOBM850104 float64\nNT_EFC195 int64\nAS_MeanAmphiMoment float64\nAS_DAYM780201 float64\nAS_FUKS010112 float64\nCT_RACS820104 float64\nCLASS int64\ndtype: object"},"metadata":{}}]},{"metadata":{"trusted":true},"cell_type":"code","source":"#Generate descriptive statistics that summarize the central tendency, dispersion, and shape of a dataset’s distribution, excluding NaN values\ndata.describe()","execution_count":9,"outputs":[{"output_type":"execute_result","execution_count":9,"data":{"text/plain":" FULL_Charge FULL_AcidicMolPerc FULL_AURR980107 FULL_DAYM780201 \\\ncount 3038.000000 3038.000000 3038.000000 3038.000000 \nmean 2.060237 8.521520 0.971410 73.668760 \nstd 3.819929 7.586652 0.107413 8.527489 \nmin -16.000000 0.000000 0.684000 42.750000 \n25% 0.000000 2.516000 0.895000 68.294000 \n50% 2.000000 7.143000 0.963000 74.059500 \n75% 4.000000 13.158000 1.041000 79.343750 \nmax 30.000000 46.667000 1.451000 101.682000 \n\n FULL_GEOR030101 FULL_OOBM850104 NT_EFC195 AS_MeanAmphiMoment \\\ncount 3038.000000 3038.000000 3038.000000 3038.000000 \nmean 0.994007 -2.432927 0.088545 15.683233 \nstd 0.031333 1.707223 0.284133 11.575665 \nmin 0.866000 -10.432000 0.000000 0.041000 \n25% 0.974000 -3.606000 0.000000 5.587500 \n50% 0.994000 -2.296500 0.000000 14.988500 \n75% 1.011000 -1.283250 0.000000 26.807750 \nmax 1.196000 3.576000 1.000000 51.280000 \n\n AS_DAYM780201 AS_FUKS010112 CT_RACS820104 CLASS \ncount 3038.000000 3038.000000 3038.000000 3038.000000 \nmean 73.650828 5.911361 1.235255 0.500000 \nstd 9.166092 0.693689 0.210012 0.500082 \nmin 42.778000 3.533000 0.785000 0.000000 \n25% 67.556000 5.459250 1.082000 0.000000 \n50% 73.697000 5.925500 1.184000 0.500000 \n75% 79.778000 6.382000 1.351000 1.000000 \nmax 103.167000 8.662000 2.192000 1.000000 ","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
FULL_ChargeFULL_AcidicMolPercFULL_AURR980107FULL_DAYM780201FULL_GEOR030101FULL_OOBM850104NT_EFC195AS_MeanAmphiMomentAS_DAYM780201AS_FUKS010112CT_RACS820104CLASS
count3038.0000003038.0000003038.0000003038.0000003038.0000003038.0000003038.0000003038.0000003038.0000003038.0000003038.0000003038.000000
mean2.0602378.5215200.97141073.6687600.994007-2.4329270.08854515.68323373.6508285.9113611.2352550.500000
std3.8199297.5866520.1074138.5274890.0313331.7072230.28413311.5756659.1660920.6936890.2100120.500082
min-16.0000000.0000000.68400042.7500000.866000-10.4320000.0000000.04100042.7780003.5330000.7850000.000000
25%0.0000002.5160000.89500068.2940000.974000-3.6060000.0000005.58750067.5560005.4592501.0820000.000000
50%2.0000007.1430000.96300074.0595000.994000-2.2965000.00000014.98850073.6970005.9255001.1840000.500000
75%4.00000013.1580001.04100079.3437501.011000-1.2832500.00000026.80775079.7780006.3820001.3510001.000000
max30.00000046.6670001.451000101.6820001.1960003.5760001.00000051.280000103.1670008.6620002.1920001.000000
\n
"},"metadata":{}}]},{"metadata":{"trusted":true},"cell_type":"code","source":"#number of null values in each column\ndata.isnull().sum()\n#since my data has no null values then its good to go","execution_count":10,"outputs":[{"output_type":"execute_result","execution_count":10,"data":{"text/plain":"FULL_Charge 0\nFULL_AcidicMolPerc 0\nFULL_AURR980107 0\nFULL_DAYM780201 0\nFULL_GEOR030101 0\nFULL_OOBM850104 0\nNT_EFC195 0\nAS_MeanAmphiMoment 0\nAS_DAYM780201 0\nAS_FUKS010112 0\nCT_RACS820104 0\nCLASS 0\ndtype: int64"},"metadata":{}}]},{"metadata":{},"cell_type":"markdown","source":""},{"metadata":{},"cell_type":"markdown","source":"#### needed to know how balanced the class values are"},{"metadata":{"trusted":true},"cell_type":"code","source":"\ndata.groupby('CLASS').size().plot(kind='bar')","execution_count":11,"outputs":[{"output_type":"execute_result","execution_count":11,"data":{"text/plain":""},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"
","image/png":"iVBORw0KGgoAAAANSUhEUgAAAYAAAAEGCAYAAABsLkJ6AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAESVJREFUeJzt3XuMXGd5x/Hvj5iEW8G5LBBsh03BBQJtIWxDWqqKYsgNhFOVVIlQY6Vp3aqh5VIEAf6ICkIFUZESiaZyicGoKJByUSwaSq0EhKo2IRsIuRKyDRAvDslSh7Ql5RJ4+se8bgZ77bV31jvB7/cjjeac5zxnzjvSeH8+t5lUFZKk/jxq3AOQJI2HASBJnTIAJKlTBoAkdcoAkKROGQCS1CkDQJI6ZQBIUqcMAEnq1IpxD2BfjjnmmJqcnBz3MCTp58oNN9zw3aqaWKjvER0Ak5OTTE9Pj3sYkvRzJcm39qfPQ0CS1CkDQJI6ZQBIUqcMAEnqlAEgSZ0yACSpUwaAJHXKAJCkTj2ibwT7eTF54T+NewiHlG+++xXjHsIhxc/n0jnUPpvuAUhSpwwASeqUASBJnTIAJKlTBoAkdcoAkKROGQCS1CkDQJI6ZQBIUqcMAEnqlAEgSZ0yACSpUwsGQJLNSe5Lcss8y96UpJIc0+aT5JIkM0luSnLiUO+GJHe2x4alfRuSpAO1P3sAHwZO272YZA3wcuDuofLpwNr22Ahc2nqPAi4CXgScBFyU5MhRBi5JGs2CAVBVXwR2zrPoYuDNQA3V1gMfqYFrgZVJjgVOBbZV1c6quh/YxjyhIklaPos6B5DkVcC3q+qruy1aBWwfmp9ttb3VJUljcsA/CJPkccDbgVPmWzxPrfZRn+/1NzI4fMRxxx13oMOTJO2nxewBPAM4Hvhqkm8Cq4EvJ3kqg//ZrxnqXQ3s2Ed9D1W1qaqmqmpqYmJiEcOTJO2PAw6Aqrq5qp5cVZNVNcngj/uJVfUdYCtwbrsa6GTggaq6B/gccEqSI9vJ31NaTZI0JvtzGejlwL8Dz0oym+T8fbRfBdwFzAB/D/wpQFXtBN4JXN8e72g1SdKYLHgOoKrOWWD55NB0ARfspW8zsPkAxydJOki8E1iSOmUASFKnDABJ6pQBIEmdMgAkqVMGgCR1ygCQpE4ZAJLUKQNAkjplAEhSpwwASeqUASBJnTIAJKlTBoAkdcoAkKROGQCS1CkDQJI6ZQBIUqcMAEnq1P78KPzmJPcluWWo9t4kX0tyU5JPJ1k5tOytSWaS3JHk1KH6aa02k+TCpX8rkqQDsT97AB8GTtuttg14XlX9CvB14K0ASU4Azgae29b52ySHJTkM+ABwOnACcE7rlSSNyYIBUFVfBHbuVvuXqnqozV4LrG7T64GPVdUPq+obwAxwUnvMVNVdVfUj4GOtV5I0JktxDuAPgM+26VXA9qFls622t/oekmxMMp1kem5ubgmGJ0maz0gBkOTtwEPAR3eV5mmrfdT3LFZtqqqpqpqamJgYZXiSpH1YsdgVk2wAXgmsq6pdf8xngTVDbauBHW16b3VJ0hgsag8gyWnAW4BXVdWDQ4u2AmcnOSLJ8cBa4EvA9cDaJMcnOZzBieKtow1dkjSKBfcAklwOvAQ4JskscBGDq36OALYlAbi2qv6kqm5NcgVwG4NDQxdU1U/a67wW+BxwGLC5qm49CO9HkrSfFgyAqjpnnvJl++h/F/CueepXAVcd0OgkSQeNdwJLUqcMAEnqlAEgSZ0yACSpUwaAJHXKAJCkThkAktQpA0CSOmUASFKnDABJ6pQBIEmdMgAkqVMGgCR1ygCQpE4ZAJLUKQNAkjplAEhSpwwASerUggGQZHOS+5LcMlQ7Ksm2JHe25yNbPUkuSTKT5KYkJw6ts6H135lkw8F5O5Kk/bU/ewAfBk7brXYhcHVVrQWubvMApwNr22MjcCkMAoPBj8m/CDgJuGhXaEiSxmPBAKiqLwI7dyuvB7a06S3AmUP1j9TAtcDKJMcCpwLbqmpnVd0PbGPPUJEkLaPFngN4SlXdA9Cen9zqq4DtQ32zrba3+h6SbEwynWR6bm5ukcOTJC1kqU8CZ55a7aO+Z7FqU1VNVdXUxMTEkg5OkvSwxQbAve3QDu35vlafBdYM9a0GduyjLkkak8UGwFZg15U8G4Arh+rntquBTgYeaIeIPgeckuTIdvL3lFaTJI3JioUaklwOvAQ4Jsksg6t53g1ckeR84G7grNZ+FXAGMAM8CJwHUFU7k7wTuL71vaOqdj+xLElaRgsGQFWds5dF6+bpLeCCvbzOZmDzAY1OknTQeCewJHXKAJCkThkAktQpA0CSOmUASFKnDABJ6pQBIEmdMgAkqVMGgCR1ygCQpE4ZAJLUKQNAkjplAEhSpwwASeqUASBJnTIAJKlTBoAkdcoAkKROjRQASd6Q5NYktyS5PMljkhyf5Lokdyb5eJLDW+8RbX6mLZ9cijcgSVqcRQdAklXAnwNTVfU84DDgbOA9wMVVtRa4Hzi/rXI+cH9VPRO4uPVJksZk1ENAK4DHJlkBPA64B3gp8Im2fAtwZpte3+Zpy9clyYjblyQt0qIDoKq+Dfw1cDeDP/wPADcA36uqh1rbLLCqTa8Ctrd1H2r9R+/+ukk2JplOMj03N7fY4UmSFjDKIaAjGfyv/njgacDjgdPnaa1dq+xj2cOFqk1VNVVVUxMTE4sdniRpAaMcAnoZ8I2qmquqHwOfAn4DWNkOCQGsBna06VlgDUBb/iRg5wjblySNYJQAuBs4Ocnj2rH8dcBtwOeBV7eeDcCVbXprm6ctv6aq9tgDkCQtj1HOAVzH4GTul4Gb22ttAt4CvDHJDINj/Je1VS4Djm71NwIXjjBuSdKIVizcsndVdRFw0W7lu4CT5un9AXDWKNuTJC0d7wSWpE4ZAJLUKQNAkjplAEhSpwwASeqUASBJnTIAJKlTBoAkdcoAkKROGQCS1CkDQJI6ZQBIUqcMAEnqlAEgSZ0yACSpUwaAJHXKAJCkThkAktSpkQIgycokn0jytSS3J/n1JEcl2ZbkzvZ8ZOtNkkuSzCS5KcmJS/MWJEmLMeoewPuBf66qZwO/CtzO4Mfer66qtcDVPPzj76cDa9tjI3DpiNuWJI1g0QGQ5InAbwGXAVTVj6rqe8B6YEtr2wKc2abXAx+pgWuBlUmOXfTIJUkjGWUP4BeBOeBDSb6S5INJHg88paruAWjPT279q4DtQ+vPtpokaQxGCYAVwInApVX1AuD7PHy4Zz6Zp1Z7NCUbk0wnmZ6bmxtheJKkfRklAGaB2aq6rs1/gkEg3Lvr0E57vm+of83Q+quBHbu/aFVtqqqpqpqamJgYYXiSpH1ZdABU1XeA7Ume1UrrgNuArcCGVtsAXNmmtwLntquBTgYe2HWoSJK0/FaMuP6fAR9NcjhwF3Aeg1C5Isn5wN3AWa33KuAMYAZ4sPVKksZkpACoqhuBqXkWrZunt4ALRtmeJGnpeCewJHXKAJCkThkAktQpA0CSOmUASFKnDABJ6pQBIEmdMgAkqVMGgCR1ygCQpE4ZAJLUKQNAkjplAEhSpwwASeqUASBJnTIAJKlTBoAkdcoAkKROGQCS1KmRAyDJYUm+kuQzbf74JNcluTPJx9sPxpPkiDY/05ZPjrptSdLiLcUewOuA24fm3wNcXFVrgfuB81v9fOD+qnomcHHrkySNyUgBkGQ18Argg20+wEuBT7SWLcCZbXp9m6ctX9f6JUljMOoewN8AbwZ+2uaPBr5XVQ+1+VlgVZteBWwHaMsfaP0/I8nGJNNJpufm5kYcniRpbxYdAEleCdxXVTcMl+dprf1Y9nChalNVTVXV1MTExGKHJ0lawIoR1n0x8KokZwCPAZ7IYI9gZZIV7X/5q4EdrX8WWAPMJlkBPAnYOcL2JUkjWPQeQFW9tapWV9UkcDZwTVW9Bvg88OrWtgG4sk1vbfO05ddU1R57AJKk5XEw7gN4C/DGJDMMjvFf1uqXAUe3+huBCw/CtiVJ+2mUQ0D/r6q+AHyhTd8FnDRPzw+As5Zie5Kk0XknsCR1ygCQpE4ZAJLUKQNAkjplAEhSpwwASeqUASBJnTIAJKlTBoAkdcoAkKROGQCS1CkDQJI6ZQBIUqcMAEnqlAEgSZ0yACSpUwaAJHXKAJCkTi06AJKsSfL5JLcnuTXJ61r9qCTbktzZno9s9SS5JMlMkpuSnLhUb0KSdOBG2QN4CPiLqnoOcDJwQZITGPzY+9VVtRa4mod//P10YG17bAQuHWHbkqQRLToAquqeqvpym/5v4HZgFbAe2NLatgBntun1wEdq4FpgZZJjFz1ySdJIluQcQJJJ4AXAdcBTquoeGIQE8OTWtgrYPrTabKvt/lobk0wnmZ6bm1uK4UmS5jFyACR5AvBJ4PVV9V/7ap2nVnsUqjZV1VRVTU1MTIw6PEnSXowUAEkezeCP/0er6lOtfO+uQzvt+b5WnwXWDK2+GtgxyvYlSYs3ylVAAS4Dbq+q9w0t2gpsaNMbgCuH6ue2q4FOBh7YdahIkrT8Voyw7ouB3wduTnJjq70NeDdwRZLzgbuBs9qyq4AzgBngQeC8EbYtSRrRogOgqv6V+Y/rA6ybp7+ACxa7PUnS0vJOYEnqlAEgSZ0yACSpUwaAJHXKAJCkThkAktQpA0CSOmUASFKnDABJ6pQBIEmdMgAkqVMGgCR1ygCQpE4ZAJLUKQNAkjplAEhSpwwASeqUASBJnVr2AEhyWpI7kswkuXC5ty9JGljWAEhyGPAB4HTgBOCcJCcs5xgkSQPLvQdwEjBTVXdV1Y+AjwHrl3kMkiRgxTJvbxWwfWh+FnjRcEOSjcDGNvs/Se5YprH14Bjgu+MexELynnGPQGPyiP98/hx9Np++P03LHQCZp1Y/M1O1Cdi0PMPpS5Lpqpoa9zik+fj5XH7LfQhoFlgzNL8a2LHMY5AksfwBcD2wNsnxSQ4Hzga2LvMYJEks8yGgqnooyWuBzwGHAZur6tblHEPnPLSmRzI/n8ssVbVwlyTpkOOdwJLUKQNAkjplAEhSp5b7PgAtoyTPZnCn9SoG91vsALZW1e1jHZikRwT3AA5RSd7C4Ks2AnyJwSW4AS73S/gkgVcBHbKSfB14blX9eLf64cCtVbV2PCOT9i3JeVX1oXGPowfuARy6fgo8bZ76sW2Z9Ej1l+MeQC88B3Doej1wdZI7efgL+I4Dngm8dmyjkoAkN+1tEfCU5RxLzzwEdAhL8igGX8G9isE/rFng+qr6yVgHpu4luRc4Fbh/90XAv1XVfHuvWmLuARzCquqnwLXjHoc0j88AT6iqG3dfkOQLyz+cPrkHIEmd8iSwJHXKAJCkThkA6lqSpyb5WJL/SHJbkquS/FKSW/bSvyLJd5P81W71Vyb5SpKvttf541Z/VpIvJLkxye1J/MpjPWJ4EljdShLg08CWqjq71Z7Pvi9DPAW4A/i9JG+rqkryaAbfZX9SVc0mOQKYbP2XABdX1ZXt9X/54Lwb6cC5B6Ce/Tbw46r6u12FdlXK9r2vwjnA+4G7gZNb7RcY/GfqP9tr/LCq7mjLjmVw+e2u1795yUYvjcgAUM+eB9ywv81JHgusY3AJ4+UMwoCq2sngp02/leTyJK9p92AAXAxck+SzSd6QZOWSvgNpBAaAtP9eCXy+qh4EPgn8TpLDAKrqDxmEw5eANwGbW/1DwHOAfwReAlzbDhFJY2cAqGe3Ai88gP5zgJcl+SaDPYejGRxGAgaHd6rqYuDlwO8O1XdU1eaqWg88xGDPQxo7A0A9uwY4Iskf7Sok+TXg6bs3Jnki8JvAcVU1WVWTwAXAOUmekOQlQ+3PB77V1jutnSQmyVMZhMa3D87bkQ6MdwKra0meBvwNgz2BHwDfZPBFercB9w61vh944a6rhdq6RzG4IuiZDM4JPAP4X+D7wOuqajrJ+4BXtNcGeG9V/cPBfE/S/jIAJKlTHgKSpE4ZAJLUKQNAkjplAEhSpwwASeqUASBJnTIAJKlT/wcIGF/zVTOjcwAAAABJRU5ErkJggg==\n"},"metadata":{"needs_background":"light"}}]},{"metadata":{},"cell_type":"markdown","source":"#### Its a good idea to review all the pairwise correlations of the attributes in the dataset because some machine learning algorithm like linear and logistic regression can suffer poor performance if there are highly correlated attributes in the dataset"},{"metadata":{"trusted":true},"cell_type":"code","source":"data.corr(method='pearson')","execution_count":12,"outputs":[{"output_type":"execute_result","execution_count":12,"data":{"text/plain":" FULL_Charge FULL_AcidicMolPerc FULL_AURR980107 \\\nFULL_Charge 1.000000 -0.612996 -0.490977 \nFULL_AcidicMolPerc -0.612996 1.000000 0.794796 \nFULL_AURR980107 -0.490977 0.794796 1.000000 \nFULL_DAYM780201 -0.434603 0.541481 0.548253 \nFULL_GEOR030101 -0.058725 0.115201 0.346139 \nFULL_OOBM850104 -0.283758 0.513344 0.462712 \nNT_EFC195 0.088068 -0.143168 -0.169540 \nAS_MeanAmphiMoment 0.355477 -0.431590 -0.426097 \nAS_DAYM780201 -0.365374 0.449621 0.456260 \nAS_FUKS010112 -0.090570 0.002334 0.032958 \nCT_RACS820104 0.232929 -0.213543 -0.403599 \nCLASS 0.534602 -0.598816 -0.584111 \n\n FULL_DAYM780201 FULL_GEOR030101 FULL_OOBM850104 \\\nFULL_Charge -0.434603 -0.058725 -0.283758 \nFULL_AcidicMolPerc 0.541481 0.115201 0.513344 \nFULL_AURR980107 0.548253 0.346139 0.462712 \nFULL_DAYM780201 1.000000 0.010118 0.334778 \nFULL_GEOR030101 0.010118 1.000000 0.319157 \nFULL_OOBM850104 0.334778 0.319157 1.000000 \nNT_EFC195 -0.090058 -0.230417 -0.230561 \nAS_MeanAmphiMoment -0.408793 -0.160269 -0.336297 \nAS_DAYM780201 0.894191 -0.029085 0.275640 \nAS_FUKS010112 0.055915 0.040480 -0.452769 \nCT_RACS820104 -0.326792 -0.151935 0.155304 \nCLASS -0.554838 -0.260470 -0.453287 \n\n NT_EFC195 AS_MeanAmphiMoment AS_DAYM780201 \\\nFULL_Charge 0.088068 0.355477 -0.365374 \nFULL_AcidicMolPerc -0.143168 -0.431590 0.449621 \nFULL_AURR980107 -0.169540 -0.426097 0.456260 \nFULL_DAYM780201 -0.090058 -0.408793 0.894191 \nFULL_GEOR030101 -0.230417 -0.160269 -0.029085 \nFULL_OOBM850104 -0.230561 -0.336297 0.275640 \nNT_EFC195 1.000000 0.178683 -0.036844 \nAS_MeanAmphiMoment 0.178683 1.000000 -0.322378 \nAS_DAYM780201 -0.036844 -0.322378 1.000000 \nAS_FUKS010112 0.145924 0.025580 0.045562 \nCT_RACS820104 0.080898 0.171524 -0.256060 \nCLASS 0.260702 0.693552 -0.437168 \n\n AS_FUKS010112 CT_RACS820104 CLASS \nFULL_Charge -0.090570 0.232929 0.534602 \nFULL_AcidicMolPerc 0.002334 -0.213543 -0.598816 \nFULL_AURR980107 0.032958 -0.403599 -0.584111 \nFULL_DAYM780201 0.055915 -0.326792 -0.554838 \nFULL_GEOR030101 0.040480 -0.151935 -0.260470 \nFULL_OOBM850104 -0.452769 0.155304 -0.453287 \nNT_EFC195 0.145924 0.080898 0.260702 \nAS_MeanAmphiMoment 0.025580 0.171524 0.693552 \nAS_DAYM780201 0.045562 -0.256060 -0.437168 \nAS_FUKS010112 1.000000 -0.445284 0.033432 \nCT_RACS820104 -0.445284 1.000000 0.267652 \nCLASS 0.033432 0.267652 1.000000 ","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
FULL_ChargeFULL_AcidicMolPercFULL_AURR980107FULL_DAYM780201FULL_GEOR030101FULL_OOBM850104NT_EFC195AS_MeanAmphiMomentAS_DAYM780201AS_FUKS010112CT_RACS820104CLASS
FULL_Charge1.000000-0.612996-0.490977-0.434603-0.058725-0.2837580.0880680.355477-0.365374-0.0905700.2329290.534602
FULL_AcidicMolPerc-0.6129961.0000000.7947960.5414810.1152010.513344-0.143168-0.4315900.4496210.002334-0.213543-0.598816
FULL_AURR980107-0.4909770.7947961.0000000.5482530.3461390.462712-0.169540-0.4260970.4562600.032958-0.403599-0.584111
FULL_DAYM780201-0.4346030.5414810.5482531.0000000.0101180.334778-0.090058-0.4087930.8941910.055915-0.326792-0.554838
FULL_GEOR030101-0.0587250.1152010.3461390.0101181.0000000.319157-0.230417-0.160269-0.0290850.040480-0.151935-0.260470
FULL_OOBM850104-0.2837580.5133440.4627120.3347780.3191571.000000-0.230561-0.3362970.275640-0.4527690.155304-0.453287
NT_EFC1950.088068-0.143168-0.169540-0.090058-0.230417-0.2305611.0000000.178683-0.0368440.1459240.0808980.260702
AS_MeanAmphiMoment0.355477-0.431590-0.426097-0.408793-0.160269-0.3362970.1786831.000000-0.3223780.0255800.1715240.693552
AS_DAYM780201-0.3653740.4496210.4562600.894191-0.0290850.275640-0.036844-0.3223781.0000000.045562-0.256060-0.437168
AS_FUKS010112-0.0905700.0023340.0329580.0559150.040480-0.4527690.1459240.0255800.0455621.000000-0.4452840.033432
CT_RACS8201040.232929-0.213543-0.403599-0.326792-0.1519350.1553040.0808980.171524-0.256060-0.4452841.0000000.267652
CLASS0.534602-0.598816-0.584111-0.554838-0.260470-0.4532870.2607020.693552-0.4371680.0334320.2676521.000000
\n
"},"metadata":{}}]},{"metadata":{},"cell_type":"markdown","source":"#### heat map to show the correlation of the data; plots that show the interactions between multiple variables in the dataset\n#### Correlation gives an indication of how related the changes are between two variables. If two variables change in the same direction they are positively correlated. If they change in opposite directions together (one goes up, one goes down), then they are negatively correlated. "},{"metadata":{"trusted":true},"cell_type":"code","source":"plt.figure(figsize=(8,8))\nsns.heatmap(data.corr(method='pearson'))\n","execution_count":13,"outputs":[{"output_type":"execute_result","execution_count":13,"data":{"text/plain":""},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"
","image/png":"iVBORw0KGgoAAAANSUhEUgAAAjsAAAJCCAYAAAAm+wZSAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzs3Xu8VXWd//HXW0C8YCqaeQHFDC2vkKQ2aoOXFJ0ptCw5FcqMSlNa/iornRwzy9+o5a9y1BwyRRtHpLxhg6KRjlpeQEUumnpU0iOEKV5SSYXz+f2xvlsXm73POXD22Xuz1/vZYz3O3t/1Xd/PWhvifPx8v2svRQRmZmZmrWqdRp+AmZmZWV9ysmNmZmYtzcmOmZmZtTQnO2ZmZtbSnOyYmZlZS3OyY2ZmZi3NyY6ZmZnVjaTLJD0vaX6V/ZJ0gaR2SXMlfbi3MZ3smJmZWT1NBsZ0sf8wYHjaJgI/621AJztmZmZWNxFxJ7C0iy5jgSsjcy+wiaStehPTyY6ZmZk1k22AZ3PvO1LbGuvfq9OxlvP2C0/V/fkh62+9f71DArD1oMENibvRgA0aEnf8esMbEveeeLnuMf/5rffUPSbAPh9c1JC4n3qiMf/d+vH+vfqP7TVyVL9X6h4TYNhn1m1I3EHnXqe+Grsv/r1f9707fJFs6qlkUkRMWs1hKl1zr87VyY6ZmZnVREpsVje5KdcBDM29HwL06r8knOyYmZkVUeeKRp9BNdOAkyRNAfYGXomIxb0Z0MmOmZmZ1Y2kq4HRwOaSOoDvAgMAIuISYDpwONAOvAH8U29jOtkxMzMrouhsTNiItm72B3BiLWP6biwzMzNraa7smJmZFVFnYyo7jeBkx8zMrICiQdNYjeBpLDMzM2tpruyYmZkVUYGmsVzZMTMzs5bmyo6ZmVkRFWjNjpMdMzOzImreb1CuOU9jmZmZWUtzZcfMzKyICjSNVcjKjqQVkubktmGSJki6sKzfHZJGpdcLJW1etn+VY7qIOUjSf0p6UtICSXdK2jvFnl+7qzMzM7O8olZ2lkXEiHyDpL6OeSnwNDA8IjolvR/4ELCkN4NK6h8Ry2txgmZmViAFuvW8qMlOXUnagewx9Z+P9JWVEfEU8JSkYUA/ST8H/g54DhgbEcsknQBMBNYle/rr+Ih4Q9JkYCkwEnhQ0jnAfwObAbOAMcCeEfGCpC8AX01j3Ad8OSKKsyrNzMwq8jcot771c1NY19ch3i7AnC6SjOHARRGxC/Ay8OnUfl1EfCQi9gAeBY7LHbMjcHBEfAP4LvC7iPgwcD2wLYCkDwFHA/umStYK4PPlwSVNlDRb0uxLr7y6t9dqZmbWVIpa2VllGguIKn2rtdfS0xExJ71+ABiWXu8q6QfAJsAgYEbumF/lkqf9gCMBIuIWSS+l9oOAPYFZaZpufeD58uARMQmYBPD2C0/V43rNzKzRPI1VSC8Cm5a1DQZeqMHYC4A9JK0TleuGb+ZeryBLSgAmA0dExMOSJgCjc/1ez72utuBIwBURcdqanLSZmVkrKOo0ViWzgH0lbQmQ7sIaCDzb24Ej4klgNvA9pRKLpOGSxnZz6EbAYkkDqDD9lHM38Nk07iG8m7TNBI6StEXaN1jSdmt+JWZm1jKis/Zbk3JlJ4mIJZJOBqZLWgd4DWgrq8TMlVR6PxWYC0yQdESuzz4R0VEhxPHA+UC7pDfIKknf7Oa0/o1sUfGfgHlkyU8l3wOulnQ08L/AYuCvaYHy6cCt6ZreBk5M45mZmRVCIZOdiBhUpf1G4MYq+4ZVGW5yD2O+CpxQZfeuuX4/yr3+GfCzCmNNKGt6BTg0IpZL+ihwQES8mfpeA1zTk3M0M7MCKdDjIgqZ7LSgbYGpqXrzFtWTKjMzs0wTTzvVmpOdGpN0H9lan7zxETGvr2JGxBNk37ljZmZmZZzs1FhE7N3oczAzM+tWgW49991YZmZm1tJc2TEzMysir9kxMzOzluZpLDMzM7PW4MqOmZlZAVV/NnXrcWXHzMzMWporO2ZmZkXkBcpmZmbW0rxA2czMzKw1KCIafQ7WRPqvu03d/0IsW3RXvUMCsHzOrQ2Ju2LmbQ2Je/dlAxoSd9+21+se87s3bFj3mAAzlj3dkLh/OKTis4373JTbt6p7zHv7L6t7TICrFt/XkLhvvdmhvhr7bw/cUPN/79fb84g+O9/ecGXHzMzMWprX7JiZmRVRZ3FuPXeyY2ZmVkQFuhvL01hmZmbW0lzZMTMzKyLfem5mZmbWGlzZMTMzKyKv2TEzMzNrDa7smJmZFVGB1uw42TEzMyuiAiU7nsYyMzOzlubKjpmZWQFFFOcblF3ZMTMzs5bmZMfMzKyIOjtrv/WApDGSHpPULunUCvu3lXS7pIckzZV0eG8vte7JjqQVkubktmGSJki6sKzfHZJGpdcLJW1etn+VY7qJO1JSSDq0B33/RdIxFdqHSZqfXo+SdEE34yyUdFdZ25zSGF0cN1rSb9LrCZL+ko57RNIJ3Z2/mZlZt6Kz9ls3JPUDLgIOA3YG2iTtXNbtdGBqRIwExgEX9/ZSG7FmZ1lEjMg3SKpH3Dbg7vRzRlcdI+KS7gaLiNnA7B7E3UjS0Ih4VtKHenSmq7omIk6StAWwQNK0iFjS3UGS+kfE8jWMaWZmVmt7Ae0R8RSApCnAWOCRXJ8A3pNebwws6m3QQkxjKcumjgImAIdIWi+375hUJntY0i9T25mSTkmv90z77gFOzB2Xr74MknS5pHlprE/nwk8Fjk6v24Crc2OslzvuIUkHdHUdEfE88CSwnaQNJV0maVY6dmwac4KkX0m6Cbg1tX0rxXhY0jlr8hmamVmLacw01jbAs7n3Hakt70zgC5I6gOnAV3p7qY1IdtbPTWFdX6eY+wJPR8STwB3A4QCSdgG+AxwYEXsAJ1c49nLgqxHx0S7G/zfglYjYLSJ2B36X2/dr4FPp9SeAm3L7TgSIiN3IEqEr8olYOUnvB94PtKfz/l1EfAQ4APihpA1T148Cx0bEgZIOA44A9k7XeF6FcSdKmi1pdmfn611cppmZWXX53ydpm1jepcJhUfa+DZgcEUPIfl//UlKv8pWmmMZi1Qvtrn11tQFT0uspwHjgOuBA4NcR8QJARCzNHyRpY2CTiPjf1PRLsnnGcgeTzSuSxnkpt28p8JKkccCjwBu5ffsB/5GO+aOkPwE7Vhj/aEn7AW8CX4yIpZIOAT5ZqkAB6wHbpte35a7lYODyiHij0jWmtknAJID+625Tq8/czMyaWR88Gyv/+6SKDmBo7v0QVp2mOg4Yk8a7JxUBNgeeX9Pzapbv2XkR2LSsbTDwQm8HTouhPk2WGHyHLKvcTNJG6XVXv9y729/TfteQLciaUOG4nrgmIk6qcOynI+KxlRqlvYHXy/o5gTEzs5U15huUZwHDJW0PPEdWKPhcWZ9ngIOAyWmt63rAX3oTtFnW7MwC9pW0JWR3OgEDWXleb00dDDwcEUMjYlhEbAdcSza1MxP4rKTNUtzB+QMj4mXglVRVAfh8lRi3Au8kI5LKE7fryaaPyhdG31kaU9KOZJWZx+iZGcBX0nokJI3s4tz+WdIGqd/gKv3MzMz6VLpp5iSy32GPkt11tUDSWZI+mbp9AzhB0sNk61wnRESv/qO9KSo7EbFE0snA9DQv9xrQFrFSjW2upNL7qcBcYIKkI3J99omIjrLh28iSjbxrgS9FxC8lnQ38r6QVwEOsWn35J+AySW9Q/S6uHwAXpVvKVwDfI5smK13fX4FzYZU7zy4GLpE0D1hO9gf6Zg/vTvs+8BOyz0XAQuAfyztFxC2SRgCzJb1FttjrX3sSwMzMWlgfTGP1KGzEdLLfRfm2M3KvHyFba1sz6mWyZC2mEWt2li26q/tOfWD5nFsbEnfFzNsaEvfuywY0JO6+bfVf9P7dGzbsvlMfmLHs6YbE/cMhgxoSd8rtW9U95r39l9U9JsBVi+9rSNy33uzos+9mWTbjwpr/e7/+oSfV5btkVldTVHbMzMyszgr01POWSnYk3Ue21idvfETMa8T5mJmZWeO1VLITEXs3+hzMzMzWCq7smJmZWUtr0ALlRmiWW8/NzMzM+oQrO2ZmZkVUoGksV3bMzMyspbmyY2ZmVkQFWrPjZMfMzKyIPI1lZmZm1hpc2TEzMyuiAk1jubJjZmZmLc2VHVvJ1oMG1z1mox7I2X/EIQ2J23n7zIbEHbHTnxsSt/OV+v8z8xYb1D0mgGjQMxAb9J+t6zfgOdIPv7mk/kGBddSCtYECrdlxsmNmZlZEBUp2WjBVNTMzM3uXKztmZmZFFA2Yh2wQV3bMzMyspbmyY2ZmVkRes2NmZmbWGlzZMTMzK6ICVXac7JiZmRWRv0HZzMzMrDW4smNmZlZEBZrGcmXHzMzMWporO2ZmZkVUoC8VdLJjZmZWRJ7GMjMzM2sNruyYmZkVkSs79SFphaQ5uW2YpAmSLizrd4ekUen1Qkmbl+1f5Zhu4o6UFJIOzbUNkzS/rN+Zkk5JrydLejqd58OSDio7v8dS+yxJI3L7jpY0V9ICSefl2reVdLukh9L+w3P7TpPUnsbMn+Nlkp6vcJ6DJd0m6Yn0c9PU/s3cZzs/fd6De/o5mZmZtYJGT2Mti4gRuW1hneK2AXenn6vjmxExAvg/wCVl+z4fEXsAFwM/BJC0WXp9UETsArwvlySdDkyNiJHAuHQcknZO73cBxgAXS+qXjpmc2sqdCsyMiOHAzPSeiPhh6bMFTgP+NyKWruY1m5lZK4rO2m9NqtHJTt1JEnAUMAE4RNJ6azDMPcA2Pdj3fuDxiPhLev9b4NPpdQDvSa83Bhal12OBKRHxZkQ8DbQDewFExJ1ApWRlLHBFen0FcESFPm3A1V1flpmZFUV0Rs23ZtXoZGf93DTL9XWKuS/wdEQ8CdwBHN5194rGADf0YF878ME0RdafLAkZmvadCXxBUgcwHfhKat8GeDY3XgfVE6uS90XEYoD0c4v8TkkbpPO6ttLBkiZKmi1p9mtvuvBjZmatpdELlJelKZa8aqlhrVLGNmBKej0FGA9c18O4P0zrbrYA9inrd5WkDYF+wIcBIuIlSV8CrgE6gT+QVXtK5zE5Is6X9FHgl5J2BdTNOayJTwC/rzaFFRGTgEkA2w7erXlTczMzqx0vUG6oF4FNy9oGAy/0duC09uXTwBmSFgL/ARwmaaMexv0m8AGy9TZXlPX9PLA98N/ARaXGiLgpIvaOiI8CjwFPpF3HAVNTn3uA9YDNySo5Q3PjDuHdKa5qlkjaKl3jVsDzZfvH4SksMzMrqGZMdmYB+0raEiDdhTWQlad21tTBwMMRMTQihkXEdmRTO0dExGvA4tIC4nTX0hiyhczviIhO4KfAOvk7pdK+t8kSoX0kfSiNs0X6uSnwZeDS1P0ZoBTrQ2TJzl+AacA4SQMlbQ8MB+7v5rqmAcem18cCN5Z2SNoY+Pt8m5mZmRcoN1BELAFOBqZLmgP8BGhLSUbJXEkdaft/qW1Crq1D0pAKw7cB5WuDrgU+l14fA5ye4v4O+F5a21N+jgH8APhWhX3LgPOBU1LTTyU9AvweOCciHk/t3wBOkPQwWdVlQmQWkFV8HgFuAU6MiBUAkq4mWwC9U7rG49JY5wAfl/QE8PH0vuRI4NaIeL3C52FmZtbyGrpmJyIGVWm/kSqViIgYVmW4yT2IN6FC2zSyyggR8QhwQE+OjYhrSQt+I2J02b7zc68r3t6eYu1bZd/ZwNkV2quN9SKpSlRh32R68NmYmVnBNPHdU7XW6AXKZmZm1ggFWqDcssmOpPvI1vrkjY+IeY04HzMzM2uMlk12ImLvRp+DmZlZ0ypQZafpFiibmZmZ1VLLVnbMzMysC+EFymZmZtbKPI1lZmZm1hqc7JiZmRVRZ9R+6wFJYyQ9Jqld0qlV+nxW0iOSFkj6795eqqexzMzMrC7SMyovIvu2/w5glqRp6Yt2S32GA6cB+6YHam/R27hOdszMzIqoMc+y2gtoj4inACRNAcaSPSKp5ATgooh4CSAiyh9uvdo8jWVmZlZEjZnG2oaVH+zdkdrydgR2lPR7SfdKGtPbS3Vlx1ay0YAN6h5zxczb6h4ToPP2mQ2Ju+7Xzm1I3JuuOKMhcf9h+XN1j7l5NOaftgHq15C4632z4rKHPjfw1sl1jzm4/4Z1jwkwsP+AhsRd20iaCEzMNU2KiEn5LhUOK8+S+gPDgdHAEOAuSbtGxMtrel5OdszMzAoo+uDW85TYTOqiSwcwNPd+CLCoQp97I+Jt4GlJj5ElP7PW9Lw8jWVmZmb1MgsYLml7SesC44BpZX1uAA4AkLQ52bTWU70J6sqOmZlZEfXwVvFaiojlkk4CZgD9gMsiYoGks4DZETEt7TtE0iPACuCbEfFib+I62TEzM7O6iYjpwPSytjNyrwP4etpqwsmOmZlZETXm1vOGcLJjZmZWRA2YxmoUL1A2MzOzlubKjpmZWRH5qedmZmZmrcGVHTMzsyIq0JodJztmZmZFVKC7sTyNZWZmZi3NlR0zM7MiKtA0lis7ZmZm1tJc2TEzMyugvnjqebNqWGVH0gpJc3LbMEkTJF1Y1u8OSaPS64XpCaj5/asc00XMhZLmpe0RST+QNLCsz9ck/U3Sxun9FpKelrRlrs/Fkk6VNFpSSDout29kajslvb8md40LJc1J7QMkXZHO5VFJp+XGGCPpMUntkk7NtV+V2udLukzSgNQuSRek/nMlfTh3zC2SXpb0m558RmZmVhCdUfutSTVyGmtZRIzIbQvrFPeAiNgN2At4PzCpbH8b2SPojwSIiOeBc4EfAaREYj/g/NR/HnB07vhxwMOlNxFxdOkagWuB69KuzwAD07nsCXwxJXz9gIuAw4CdgTZJO6djrgI+COwGrA8cn9oPA4anbSLws9z5/BAY39MPx8zMrNUUds1ORLwG/AtwhKTBAJJ2AAYBp5MlPSWTgB0kHQBcCJwUEW+nfc8A60l6nyQBY4Cby+OlfZ8Fri6dArChpP5kictbwKtkSVh7RDwVEW8BU4Cx6ZynRwLcDwxJY40Frky77gU2kbRVOmYm8NfefFZmZtaCXNmpi/Vz0zvXN+IEIuJV4GmyighkCc7VwF3ATpK2SP06gS+RVWYej4g7y4b6NVml5u+AB4E3K4TbH1gSEU/kjnkdWEyWMP0oIpYC2wDP5o7rSG3vSNNX44FbUlO3x3RF0kRJsyXNfmnZ8z09zMzMbK3QyAXKy9LUTl61tLAv00XlXo8DjoyITknXkSUwFwFExBxJ84GLK4wxFbiGbIrparKkp1wpkSrZC1gBbA1sCtwl6bdl51NSfv0XA3dGxF0VrqHaMVVFxCTSdN4u79u7eVNzMzOrnQJ9qWCz3Y31Itkv/rzBwAt9EUzSRsAw4HFJu5NVeG7LZpxYF3iKlOwknWlbSUT8WdLbwMeBkylLdtJU1afI1uaUfA64JU2HPS/p98AosgrN0Fy/IcCi3FjfBd4LfDHXp6OrY8zMzIqs2dbszAL2Ld35lO7CGsjKUzQ1IWkQWYXkhoh4iazycmZEDEvb1sA2krbr4ZBnAN+OiBUV9h0M/DEiOnJtzwAHpjupNgT2Af5I9hkMl7S9pHXJqk3T0jkfDxwKtKWptZJpwDFprH2AVyJicQ/P28zMiqhAa3aaqrITEUsknQxMl7QO8Bqr/mKfK6n0fiowF5gg6Yhcn33KEou829Ni4XWA64Hvp/ZxZHc15V2f2s/twbn/oYvd41h5CguyitHlwHyyaajLI2IugKSTgBlAP+CyiFiQjrkE+BNwT6o+XRcRZwHTgcOBduAN4J9KQSTdRTa9NkhSB3BcRMzo7nrMzKy1RRMnJ7XWsGQnIgZVab8RuLHKvmFVhpvcw5jVjicitq/Q9vWy96PL3t8B3FHhuDPL3k+o0Oc1sjVBlc5lOlkCU95e8c8r3Z11YpV9+1dqNzMzK4qmquyYmZlZnbiys3aTdB/ZWp+88RExrxHnY2ZmZo3TkslOROzd6HMwMzNragV6NlZLJjtmZmbWjQJNYzXbredmZmZmNeXKjpmZWRG5smNmZmbWGlzZMTMzK6DsK9qKwcmOmZlZEXkay8zMzKw1uLJjZmZWRAWq7DjZsZWMX2943WPefdnyuscEGLHTnxsS96YrzmhI3AlzzmpI3D8fdkLdY7azrO4xAfZfb0hD4l5y+BUNibtZ9kDiutqt36Z1jwmwyWa7NCSu1YaTHTMzswIq0lPPvWbHzMzMWporO2ZmZkVUoMqOkx0zM7MiKs5zQD2NZWZmZq3NlR0zM7MC8gJlMzMzsxbhyo6ZmVkRFaiy42THzMysiLxA2czMzKw1uLJjZmZWQF6gbGZmZtYiXNkxMzMrIq/ZMTMzs1YWnVHzrSckjZH0mKR2Sad20e8oSSFpVG+v1cmOmZmZ1YWkfsBFwGHAzkCbpJ0r9NsI+CpwXy3iNjTZkbRC0pzcNkzSBEkXlvW7o5TZSVooafOy/asc00XMQZJ+JulJSQ9JekDSCWnfMEnLys7pmLRvY0lXpuOeTK83rnDcI2nfgFzM01IG+5ikQ1PbepLul/SwpAWSvpfrv72k+yQ9IekaSeum9o9JelDScklHlV3Xsan/E5KOzbWfLelZSa/15PMxM7OC6OyDrXt7Ae0R8VREvAVMAcZW6Pd94Dzgb2t0bWUaXdlZFhEjctvCOsS8FHgJGB4RI4ExwODc/ifLzunK1P4L4KmI2CEidgCeTmOtdBywGzAE+CxAyljHAbukWBenzPZN4MCI2AMYAYyRtE8a61zgxxExPJ3rcan9GWAC8N/5C5I0GPgusDfZX6TvSto07b4ptZmZmTXaNsCzufcdqe0dkkYCQyPiN7UK2uhkp64k7UD2i//0iOgEiIi/RMS53Rz3AWBPskyz5CxgVBrzHRGxArifd//wxgJTIuLNiHgaaAf2ikyp2jIgbSFJwIHAr9O+K4Aj0tgLI2Iuq+bPhwK3RcTSiHgJuI0ssSIi7o2Ixd19NmZmVizRWftN0kRJs3PbxLKwqnQq7+yU1gF+DHyjltfa6GRn/dx00fV1iLcL8HAp0alih7JprP3J5hXnpEQGeCepmZPGfIek9cgqLLekpqpZrKR+kuYAz5MlK/cBmwEvR8Ty8v5d6DZT7kr+L+f9rz3R08PMzGxt1gfTWBExKSJG5bZJZVE7gKG590OARbn3GwG7AndIWgjsA0zr7SLlRic7+WmsI1NbteXcNf/2I0nfSQlN/oMun8a6iywTrRQ/375DSlxeBJ5JFZhSn3IBWcKUpr6GAHtJ2rWr/l1dyhoc827H3F/OvQYN7+lhZmZmq2sWMDytTV2XbJnHtNLOiHglIjaPiGERMQy4F/hkRMzuTdBGJzuVvAhsWtY2GHihBmM/AuyRymRExNkp2XhPN8ctAEaWjoN3Sm17AI+mptKanQ8A+0j6ZGrvLoslIl4G7iCbenoB2ERS/2r9K+g2hpmZWV5fTGN1GzObtTgJmEH2+3NqRCyQdFbu92bNNWOyMwvYV9KWAKl0NZCVp2nWSES0A7OBH6RFwqVpp0qVkfLjHgJOzzWfDjyY9uX7LgZOBU5LTdOAcZIGStoeGA7cL+m9kjZJ57A+cDDwx4gI4HagdLfVscCN3VzaDOAQSZumhcmHpDYzM7OmEhHTI2LHdMPP2antjIiYVqHv6N5WdaAJk52IWAKcDExP00I/AdrK1tnMldSRtv+X2ibk2jokDakS4niydTHtkh4Afgt8O7e/fM3OV1P7ccCO6RbyJ4EdefcuqXI3ABtI2j8iFgBTyapKtwAnpvU+WwG3S5pLluDdllt5/m3g65La07n+AkDSRyR1AJ8B/lPSgvSZLSVbPD0rbWelNiSdl47ZIH0uZ1Y5ZzMzK5LG3HreEA19XEREDKrSfiNVqhlpDq+SyT2M+SrwxSr7FgLrV9n3EvCFLo7bNfc+yKa4Su/PBs4uO2YuMLLKeE9R4XbxiJhFNkVV6ZjLgMsqtH8L+FalY8zMzIrAz8YyMzMroJ6ssWkVLZvsSLqPbK1P3viImNeI8zEzM2smTnZaQETs3ehzMDMzs8Zr2WTHzMzMqitSZafp7sYyMzMzqyVXdszMzIoouvyKuZbiZMfMzKyAPI1lZmZm1iJc2TEzMyug6CzONJYrO2ZmZtbSXNkxMzMroCKt2XGyY2ZmVkDhu7GsqO6Jl+se88S2xvwfrvOVxvz1/4flzzUk7p8PO6Ehcbe8+ef1D7rnKfWPCcxbvrQhcU/epDErEg5b8mLdY17auX3dYwIcv2JRQ+JabTjZMTMzK6AiTWN5gbKZmZm1NFd2zMzMCsi3npuZmZm1CFd2zMzMCiii0WdQP052zMzMCsjTWGZmZmYtwpUdMzOzAnJlx8zMzKxFuLJjZmZWQF6gbGZmZi3N01hmZmZmLcKVHTMzswIq0lPPXdkxMzOzllb3ZEfSCklzctswSRMkXVjW7w5Jo9LrhZI2L9u/yjFdxNxY0pWSnkzblZI2zu3fRdLvJD0u6QlJ/yZJuTh/See6QNKvJW2Q9p0pKSR9IDfW11Jb6dzbJM2TNFfSLaXrSMc+l/scDs+NcZqkdkmPSTo0136ZpOclzS+7vsGSbkvnfpukTcv2fyR97kf15PMyM7PWF52135pVIyo7yyJiRG5bWIeYvwCeiogdImIH4GngUgBJ6wPTgHMiYkdgD+DvgC/njr8mnesuwFvA0bl984BxufdHAY+ksfsDPwUOiIjdgbnASbm+P859DtPTMTun8XYBxgAXS+qX+k9ObeVOBWZGxHBgZnpPGq8fcC4wo9tPyczMCqMzVPOtWbX8NFaquuwJfD/XfBYwStIOwOeA30fErQAR8QZZQnJqhbH6AxsCL+WabwDGpv3vB14B/lI6JG0bpkrRe4BF3ZzyWGBKRLwZEU8D7cBe6dzuBJZWOeaK9PoK4Ijcvq8A1wLPdxPXzMysJTUi2Vk/N3VzfR3i7QzMiYgVpYb0eg5Z9WQX4IH8ARHxJDBI0ntS09GS5gDPAYOBm3LdXwWelbQr0AZckxvnbeBLZNWfRelcfpE79qQ0vXVZbuppG+DZXJ+O1NaV90XE4hRzMbAFgKRtgCMIullZAAAgAElEQVSBS7o6WNJESbMlzV742jPdhDIzs1YQoZpvzarR01hHprZqX21Ui688UpVxSu3V9ufjXxMRI4AtyRKXb5b1m0I29XQE8E4CJ2kAWbIzEtiabBrrtLT7Z8AOwAhgMXB+7ryqncfq+gnw7XyiV0lETIqIURExatigbdcwlJmZWXNqlmmsF4FNy9oGAy/UYOwFwEhJ71xrer0H8GjaPyp/QJqOei0i/ppvj4ggq+p8rCzGTcB44JmIeDXXPiId92Q6dirZeiAiYklErIiITuDnpKkqskrO0NwYQ+h+6muJpK3SuW/Fu1NWo4ApkhaSrSW6WNIRlYcwM7MiiU7VfGtWzZLszAL2lbQlQLqTaSArT+eskYhoBx4CTs81nw48mPZdBewn6eAUe33gAuC8KkPuBzxZFmMZ8G3g7LK+zwE7S3pvev9xsgSrlJSUHAmU7rCaBoyTNFDS9sBw4P5uLnMacGx6fSxwYzqv7SNiWEQMA34NfDkibuhmLDMzs5bSFF8qGBFLJJ0MTE9Vl9eAtlT1KJkrqfR+KtmU0ISySsU+EdFRIcRxwH9IaiebJrontRERyySNTfsvAvoBvwTyt7UfLWk/suSwA5hQ4RqmVGhbJOl7wJ2S3gb+lDv2PEkjyKaoFgJfTMcskDSV7I6u5cCJpWkoSVcDo4HNJXUA342IXwDnAFMlHQc8A3ymwmdgZmb2Dj8bqw9FxKAq7TeSKhIV9g2rMtzkHsZ8CfhCF/vnkSURlfZNrhYnIs6s0j469/oSKiwQjojxXZzP2axaJSIi2qr0fxE4qNp4qc+ErvabmVmxNPO0U601yzSWmZmZWZ9oimmsWpF0H9lan7zxqXJjZmZmSTN/CWCttVSyExF7N/oczMzMrLm0VLJjZmZmPdPMXwJYa052zMzMCqhId2N5gbKZmZm1NFd2zMzMCqhIC5Rd2TEzM7O6kTRG0mOS2iWdWmH/1yU9kh6UPVPSdr2N6WTHzMysgBrx1HNJ/YCLgMOAnYE2STuXdXsIGBURu5M96qja45t6zMmOmZlZAUXUfuuBvYD2iHgqIt4CpgBjVz6vuD0i3khv7yV7IHavONkxMzOzetmGlR/y3ZHaqjkOuLm3Qb1A2czMrID6YoGypInAxFzTpIiYlO9S4bCKNSFJXwBGAX/f2/NysmMr+ee33lP3mN+9obP7Tn3gLTZoSNzNozH/t2tnWUPisucpdQ95+QM/qntMgAd2r/+1Akx7tTF/lyetU/9/L5YyoO4xAW7a7L0Nibu2SYnNpC66dABDc++HAIvKO0k6GPgO8PcR8WZvz8vJjpmZWQE16BuUZwHDJW0PPAeMAz6X7yBpJPCfwJiIeL4WQb1mx8zMzOoiIpYDJwEzgEeBqRGxQNJZkj6Zuv0QGAT8StIcSdN6G9eVHTMzswJq1JcKRsR0YHpZ2xm51wfXOqaTHTMzswIq0KOxPI1lZmZmrc2VHTMzswLys7HMzMzMWoQrO2ZmZgXUoFvPG8LJjpmZWQE15utcG8PTWGZmZtbSXNkxMzMroKj4mKrW5MqOmZmZtTRXdszMzAqos0DfKuhkx8zMrIA6PY1lZmZm1hqc7FQhKSSdn3t/iqQzJX0nPYV1jqQVuddfrTLOmZKey/WbI2kTSaMlvZJr+23umGMkzZe0QNIjkk5J7Z9JbZ2SRuX6ryvpcknzJD0saXRu3x2SHsvF2aJPPjAzM1urBKr51qw8jVXdm8CnJP17RLxQaoyIs4GzASS9FhEjejDWjyPiR/kGSQB3RcQ/lrUfBvwf4JCIWCRpPWB82j0f+BTwn2Xjn5DObbeUzNws6SMRUfoahc9HxOwenKeZmVnLcWWnuuXAJOBrdY57GnBKRCwCiIi/RcTP0+tHI+KxCsfsDMxMfZ4HXgZGVehXkaSJkmZLmj3jjfZeX4CZmTW/zj7YmpWTna5dBHxe0sa9HOdruWmk23Pt++fav5PadgUeWM3xHwbGSuovaXtgT2Bobv/lKca/KZWU8iJiUkSMiohRh27wgdUMbWZm1tw8jdWFiHhV0pXAV4FlvRhqlWmsZJVprDV0GfAhYDbwJ+APZJUpyKawnpO0EXAt2ZTYlTWIaWZma7FmXmNTa67sdO8nwHHAhnWKt4CsMtNjEbE8Ir4WESMiYiywCfBE2vdc+vlX4L+BvWp8vmZmthbyNJa9IyKWAlPJEp56+HfgPElbAkgaWO1OrxJJG0jaML3+OLA8Ih5J01qbp/YBwD+SLXI2MzMrDE9j9cz5wEm9OP5rkr6Qe39EtY4RMV3S+4DfpvU1QTZNhaQjgf8A3gv8j6Q5EXEosAUwQ1In8Bzv3r01MLUPAPoBvwV+3ovrMDOzFtHMlZhac7JTRUQMyr1eAmzQVZ8uxjkTOLPCroXAHVWOuRy4vEL79cD1FdoXAjtVaH+d1ZwSMzMzazVOdszMzAqoSAuUnezUSLp1/DNlzb9KX0JoZmbWVDqLk+s42amV/Dcrm5mZWfNwsmNmZlZAfuq5mZmZWYtwZcfMzKyAotEnUEdOdszMzAqoSN+z42ksMzMza2mu7JiZmRVQp7xA2czMzKwluLJjZmZWQF6gbIW1zwcX1T3mvz66rO4xAdSg75gYoH4Nibv/ekMaEnfe8qV1j/nA7qfUPSbAnnN/1JC4Z4z8ckPibtp/87rH3LGzMf9ebPuvIxoS12rDyY6ZmVkBFeluLCc7ZmZmBVSkZ2N5gbKZmZm1NFd2zMzMCsjPxjIzMzNrEa7smJmZFZBvPTczM7OW5gXKZmZmZi3ClR0zM7MCKtL37LiyY2ZmZi3NlR0zM7MC8gJlMzMza2leoGxmZmbWBySNkfSYpHZJp1bYP1DSNWn/fZKG9Tamkx0zM7MC6uyDrTuS+gEXAYcBOwNtknYu63Yc8FJEfAD4MXDuGl9k4mTHzMzM6mUvoD0inoqIt4ApwNiyPmOBK9LrXwMHSerVpFuPkh1JR0oKSR9M79eRdIGk+ZLmSZolafsujl8o6a6ytjmS5vfm5LuI11/SC5L+vcbjvlal/V8kHZNeT5b0hqSNcvt/mj6/zWt5Pj0l6V8bEdfMzJpXIyo7wDbAs7n3HamtYp+IWA68Amy2WhdXpqeVnTbgbmBcen80sDWwe0TsBhwJvNzNGBtJGgog6UNrcK6r4xDgMeCzvc0GeyIiLomIK3NN7aRMVdI6wAHAc319Hl1wsmNmZn1O0kRJs3PbxPIuFQ4rvzGsJ31WS7fJjqRBwL5kc2ilZGcrYHFEdAJEREdEvNTNUFPJkiTIkqerczH6SfphqhDNlfTFUmxJMyU9mCpIpQRimKRHJf1c0gJJt0paPxerDfgp8AywTy7OQkn/V9I96Q/hw5JmSHpS0r+kPqMl3SnpekmPSLokJSylMc6W9LCkeyW9L7WdKemUXPyrc9c6Gvg9sDw3xtdTVWy+pP+Tu6Y/Sro0tV8l6WBJv5f0hKS9Ur8NJV2WPquHcp/JBEnXSbol9T8vtZ8DrJ8qaVd182dkZmYFEeqDLWJSRIzKbZPKwnYAQ3PvhwCLqvWR1B/YGFjam2vtSWXnCOCWiHgcWCrpw2SJyyfSL9DzJY3swTi/Bj6VXn8CuCm37zjglYj4CPAR4IQ0LfY34MiI+DBZdeT8XKVmOHBRROxCVlX6NEBKeg4CfkOWdLSVncezEfFR4C5gMnAUWUJ0Vq7PXsA3gN2AHXLnvSFwb0TsAdwJnFDlWp8A3itp0xR/SmmHpD2BfwL2TnFPyH1+HyBL0nYHPgh8DtgPOIV3qzPfAX6XPqsDgB9K2jDtG0GWZO0GHC1paEScCiyLiBER8flKJ5vPxH+5qPzvnJmZtaIGTWPNAoZL2l7SumRFlGllfaYBx6bXR5H9zuvbyg4r/7KeArRFRAewE3Aa2fXNlHRQN+MsBV6SNA54FHgjt+8Q4BhJc4D7yObmhpOVsv6vpLnAb8nm8d6Xjnk6Iuak1w8Aw9LrfwRuj4g3gGuBI9Pq75LShzoPuC8i/hoRfwH+JmmTtO/+tHhqBVnCtF9qf4ssiSqPWcl1ZH+Ie5MlViX7AddHxOsR8Vrqt3/umualitkCYGb6A56Xi3UIcGr6rO4A1gO2TftmRsQrEfE34BFguy7O7x35THz81lv35BAzM7PVltbgnATMIMsFpkbEAklnSfpk6vYLYDNJ7cDXgVVuT19dXX6poKTNgAOBXSUF0A8ISd+KiDeBm4GbJS0hqwDN7CbeNWS3nE0oDwV8JSJmlMWfALwX2DMi3pa0kOyXO8Cbua4rgNI0Vhuwb+oLWeJ0AFmylD+us2yMTt79PMozyNL7t3PZ5Qq6/vymAA8CV0REZ27pUFdriMrPJ3+upVgCPh0Rj+UPlLQ3q34m/tJIMzOrqFHPxoqI6cD0srYzcq//BnymljG7q+wcBVwZEdtFxLCIGAo8DXxM0tbwzgLc3YE/9SDe9cB5ZBld3gzgS5IGpDF3TFMzGwPPp0TnALqpVEh6D1nlZNt0vsOAE1l1Kqs7e6US2zpk00J3r+bxRMQzZFNOF5ftuhM4QtIG6RqPZOXKT3dmAF8pTef1cArx7dJna2ZmVjTd/Zd/G3BOWdu1ZGtdlkoamNruBy7sLlhE/JX05UBlN0ldSjZN82D6Jf4XskrRVcBNkmYDc4A/dhPiU2Rze/kKx43Aeblz7Yl7yK57N7Lk5PrVOPYdEfGfFdoelDSZ7DMDuDQiHlLPvyHy+8BPgLnps1pINnXXlUmp/4PV1u2YmVmxFOnZWOrlmp+WI2k0cEpEdJdAtKQlo0fX/S/EgY8uq3dIANTljGLfGbDSErL62X+9IQ2JO295r26iWCP/3rlR9536wJ5zf9SQuP8w8ssNifu5zvp/ddiOnY3592LkuTs2JO76x/x7n/1D9dNtv1Dzf+9Pfua/mvKJW/4GZTMzM2tpNV3AKuk+oHy6aHxEzKtlnL4UEXeQ3eVkZmbWshq1QLkRaprsRMTetRzPzMzMrLd8a7KZmVkBubJjZmZmLa1Ityd5gbKZmZm1NFd2zMzMCqizKW8S7xuu7JiZmVlLc2XHzMysgIq0QNmVHTMzM2tpruyYmZkVUJHuxnKyYyv51BP1L/b94ZBBdY8JNKyuud43T21I3EsOv6IhcU/epP4f9LRXN6h7TIAzGvSMqv956OKGxJ26+xl1j/njdZfXPSbA/FNmNiTuo8f03didBUp3PI1lZmZmLc2VHTMzswLyAmUzMzOzFuHKjpmZWQEVZ8WOkx0zM7NC8jSWmZmZWYtwZcfMzKyA/GwsMzMzsxbhyo6ZmVkBFelLBZ3smJmZFVBxUh1PY5mZmVmLc2XHzMysgHzruZmZmVmLcGXHzMysgLxA2czMzFpacVKdtWgaS9KRkkLSB9P7dSRdIGm+pHmSZknavovjF6Z+8yQ9IukHkgaW9fmapL9J2ji930LS05K2zPW5WNKpkkan8zkut29kajslvb9G0py0LZQ0J7UPkHRFOpdHJZ2WG2OMpMcktUs6Ndd+VWqfL+kySQNSu9Ln0C5prqQP5465RdLLkn6z5p+8mZnZ2m2tSXaANuBuYFx6fzSwNbB7ROwGHAm83M0YB6S+ewHvByZViDErjUVEPA+cC/wIICUS+wHnp/7z0nmUjAMeLr2JiKMjYkREjACuBa5Luz4DDEznsifwRUnDJPUDLgIOA3YG2iTtnI65CvggsBuwPnB8aj8MGJ62icDPcufzQ2B8N5+JmZkVUGcfbM1qrUh2JA0C9gWO491kZytgcUR0AkRER0S81JPxIuI14F+AIyQNTjF2AAYBp5MlPSWTgB0kHQBcCJwUEW+nfc8A60l6nyQBY4CbK5y/gM8CV5dOAdhQUn+yxOUt4FWyJKw9Ip6KiLeAKcDYdM7TIwHuB4akscYCV6Zd9wKbSNoqHTMT+GtPPhMzM7NWtVYkO8ARwC0R8TiwNFVYpgKfSFNE50sauToDRsSrwNNkFRHIEpyrgbuAnSRtkfp1Al8iq8w8HhF3lg31a7JKzd8BDwJvVgi3P7AkIp7IHfM6sJgsYfpRRCwFtgGezR3XkdrekaavxgO3pKZuj+mOpImSZkua/efXF63OoWZmtpbqJGq+Nau1JdlpI6tykH62RUQHsBNwGln1bKakg1Zz3Pxj0MYBU1Jycx1ZAgNARMwB5gMXVxhjaupbSpaqnX9+317ACrJpuO2Bb0h6f9n5vBO+7P3FwJ0RcVeFa6h2TJciYlJEjIqIUVtuuPXqHGpmZtb0mv5uLEmbAQcCu0oKoB8Qkr4VEW+STRvdLGkJWQVoZg/H3QgYBjwuaXeyCs9t2YwT6wJPka2fKak4JRkRf5b0NvBx4GSyCk8+Tn/gU2Rrc0o+R1apeht4XtLvgVFkFZqhuX5DgEW5sb4LvBf4Yq5PR1fHmJmZVdK8dZjaWxsqO0eRrUnZLiKGRcRQsumnj0naGrI7s4DdgT/1ZMC0Buhi4Ia0zqcNODONPywitga2kbRdD8/xDODbEbGiwr6DgT+mSlTJM8CB6U6qDYF9gD+SLY4eLml7SeuSVZumpXM+HjiUrKqVT7qmAceksfYBXomIxT08bzMzK6giLVBu+soOWSJyTlnbtcBksvU7pdvH7ydbQNyV29Ni4XWA64Hvp/ZxZHc15V2f2s/t7gQj4g9d7B7HqtNbFwGXk02NCbg8IuYCSDoJmEFWwbosIhakYy4hS+buSdWn6yLiLGA6cDjQDrwB/FMpiKS7yO7gGiSpAzguImZ0dz1mZmatpOmTnYgYXaHtAuCC1RxnWBf7Vvl+noj4elfnERF3AHdUOO7MsvcTKvR5jdyaoLJ908kSmPL2in9W6e6sE6vs279Su5mZWRRoImttmMYyMzMzW2NNX9lZXZLuAwaWNY+PiHmNOB8zM7Nm1MxrbGqt5ZKdiNi70edgZmbW7Jr5e3FqzdNYZmZm1tJarrJjZmZm3StOXceVHTMzM2txTnbMzMwKqNmejSVpsKTbJD2Rfm5aoc8ISfdIWiBprqSjezK2kx0zM7MCasJvUD4VmBkRw8ke/XRqhT5vAMdExC7AGOAnkjbpbmAnO2ZmZtYMxgJXpNdXkD3vciUR8XhEPJFeLwKeJ3tmZJec7JiZmRVQ9MH/JE2UNDu3TVyNU3pf6dmO6ecWXXWWtBfZg7uf7G5g341lZmZmNRERk4BJ1fZL+i2wZYVd31mdOJK2An4JHFv2cOyKnOyYmZkVUCO+QTkiDq62T9ISSVtFxOKUzDxfpd97gP8BTo+Ie3sS18mOreTj/beqe8wpt6vuMQHWb9CXTAy8dXJD4m6mxnzOhy15se4xJ63znrrHBNi0/+YNiTt19zMaEvezc8+qe8zv79yjm29q7l8H7NSQuAUzDTgWOCf9vLG8g6R1geuBKyPiVz0d2Gt2zMzMCqgv1uz00jnAxyU9AXw8vUfSKEmXpj6fBT4GTJA0J20juhvYlR0zM7MCarYHgUbEi8BBFdpnA8en1/8F/Nfqju3KjpmZmbU0V3bMzMwKqDOK83QsV3bMzMyspbmyY2ZmVkDFqes42TEzMyuk3j64c23iaSwzMzNraa7smJmZFVANvhdnreHKjpmZmbU0V3bMzMwKqNm+VLAvOdkxMzMrIC9QNjMzM2sRruyYmZkVkBcoNzlJR0oKSR9M79eRdIGk+ZLmSZolafsujl+Y+pWemPp3kkZL+k1Zv8mSjkqv75A0Kr0eJukJSYdK2kDSVWm8+ZLuljQo9Rsj6TFJ7ZJOzY17UmoLSZvn2pWuo13SXEkfzu27RdLLFc6x2lifT2PMlfQHSXus6edtZma2NltbKzttwN3AOOBM4Ghga2D3iOiUNAR4vZsxDoiIF0pvJI3uSeA09gzgGxExQ9JpwJKI2C3t3wl4W1I/4CKyx9R3ALMkTYuIR4DfA78B7igb/jBgeNr2Bn6WfgL8ENgA+GLZMdXGehr4+4h4SdJhwKTcWGZmVnBFWqC81lV2UtVkX+A4smQHYCtgcUR0AkRER0S81AfhtwRuBU6PiGm52M+VOkTEYxHxJrAX0B4RT0XEW8AUYGzq81BELKww/ljgysjcC2wiaat0zEzgr+UHVBsrIv6Q+wzuBYasyQWbmZmt7da6ZAc4ArglIh4HlqapnqnAJ9KU1PmSRvZgnNtT//tWI/aVwIUR8atc22XAtyXdI+kHkoan9m2AZ3P9OlJbV9bkmJ44Dri52k5JEyXNljT7gdfaaxDOzMyaXUTUfGtWa2Oy00ZWJSH9bIuIDmAn4DSyytxMSQd1M84BETEiIkpTO9X+lPLtvwXGS9rgnZ0Rc4D3k00zDSabrvoQoG7GqmRNjul6QOkAsmTn29X6RMSkiBgVEaP2HPSB3oQzM7O1RCdR861ZrVVrdiRtBhwI7CopgH5ASPpWmjq6GbhZ0hKyCtDM1Rj+RWDTsrbBwAu59+cBXwB+JWlsRCwHiIjXgOuA6yR1AocDfwCG5o4dAizq5hw61uCYqiTtDlwKHBYRL67pOGZmZmuzta2ycxTZmpbtImJYRAwlW4j7MUlbQ3ZnFrA78KfVHPsJYOtUlUHSdsAewJyyfl8DXgV+ke6e2lfSpumYdYGdU+xZwHBJ26f2ccA0ujYNOCaNuw/wSkQsXs3rIJ3LtmQJ2Pg05WdmZvaOzj7YmtXaluy0AdeXtV0LTAZukjQfmAssBy5cnYFTZegLwOWS5gC/Bo6PiFfK+gVwLNnC5POAHYD/lTQPeAiYDVybqj4nkd259SgwNSIWAEj6qqQOssrNXEmXpuGnA08B7cDPgS+X4kq6C/gVcJCkDkmHdjPWGcBmwMVpbdLs1fk8zMzMWsVaNY0VEaMrtF0AXLCa4wyr0v57YJ/uYqe7qw7J7b6yyjHTyRKY8vaK55wSqROrjLV/lfZqYx0PHF/pGDMzsyJ9qeBaleyYmZlZbTTzguJaa+lkJ91WPrCseXxEzGvE+ZiZmVn9tXSyk7ut3MzMzHKa+Xtxam1tW6BsZmZmtlpaurJjZmZmlTXzreK15mTHzMysgIp0N5ansczMzKylubJjZmZWQEW69dyVHTMzM2tpruyYmZkVkG89NzMzM2sRruyYmZkVUJHW7DjZsZUc1e+V7jvV2Pkqf6LH/2/vvOPlqsq2fd0JRToEpYMBaQpCkAgoonRRQQSkBPVFpbx+WCiiAmJFEQQERJEXAY0NDChFlA4ioJTQCQHpHVEQkV5yf3+sNWQymTkn5Mza+5yZ58pvfpm99sy+18zsM/PsZz2lGm568R+16I6ZY75adN8+epFadE+ctnzlmk8yZ+WaACtPe74W3aPmeqUW3YPftmPlmrfc9tvKNQHu3WDPWnRLEqnnQRAEQRAEPUJ4doIgCIKgD5kWAcpBEARBEAS9QXh2giAIgqAP6R+/Thg7QRAEQdCX9FM2VixjBUEQBEHQ04RnJwiCIAj6kPDsBEEQBEEQ9Ajh2QmCIAiCPqSfemOFsRMEQRAEfUgsYwVBEARBEPQIYewEQRAEQR/iAv+GgqQxki6UdGf+v2NDP0kLSnpY0o9m5dhh7ARBEARBMBzYH7jY9krAxXm7EwcDl83qgcPYCYIgCII+xHbXb0Nka2Bivj8R+Ei7B0laG1gcuGBWDxzGThAEQRAEXUHSHpImN932eB1PX9z2owD5/8XaHH8UcCTwpdczrxFn7EhaQtKpku6WdJukSyU9J+lGSU9Kujffv6jD88dKej4/5jZJv5A0Z8tjjslrgaNaxj+QP7ypkm6XdEQeX0XSn/Mxp0o6IY/PKWmipFvy+AF5fNk876mSpkjaq0mj7ZqlpFUl/U3Si5L2a5nXFpLukHSXpJncfpKOlfTM7L3jQRAEQS8yDXf9ZvsE2+Obbic0a0q6SNKtbW5bz+K09wT+ZPvB1/NaR1TquSQBZwATbe+Ux8YBC9i+XNLPgXNsnz7Ioe62PU7SaOBCYAfg1/l4o4BtgAeB9wJ/zuOrAz8CPmT7dklzAA2L9YfAUbbPyo99ex7fHpjb9tslzQvcJukU4EXgi7avl7QAcJ2kC23fxvQ1y0Oz4bI/8BXgSeALtLj18mv4MbAZ8BBwraSz87GQNB5YeJbe4CAIgqBvqKPOju1NO+2T9A9JS9p+VNKSwONtHvYuYANJewLzA3NJesb2QPE9I86zsxHwsu3jGwO2b7R9+ewczParwDXA0i0atwI/ASY0jX8Z+K7t2/NzX7F9XN63JMnQaBz3lsZdYL5sGM0DvAQ8bftR29fnx/4XmNo0h7ZrlrYft30t8HLLy1gHuMv2PbZfAk7Nx2gYQofnuXek2e142tMPDPTQIAiCICjF2cAu+f4uwFmtD7D9MdvL2R4L7Af8YjBDB0aesbM6cF23DibpDcC6wHlNwxOAU0gepC2blrgG0j4KuETSuZL2kdTwpJwOPAs8CjwAHGH7yZY5jAXWAq7OQ4OuWbawNMkL1eAhphtOnwPObhyvE81ux+0XXG4QuSAIgqAXKLGMNUQOBTaTdCdpteJQSCsUkk4cyoFHmrHTLd4i6UbgCeAB2zcDSJoL+CBwpu2nSQbI5oMdzPbPgLcCpwEbAldJmpvkdXkVWApYHviipBUaz5M0P/A7YO+sNzuo3ZQkLUVaRjt2No8bBEEQBJVh+wnbm9heKf//ZB6fbHu3No//ue3PzcqxR5qxMwVYuwvHudv2OGBFYD1JH87jWwALAbdIug94D9OXsgbUtv2I7ZNtbw28QvIE7QycZ/tl248DVwLjIQUvkwydX9v+fdOh/pHXKhlgzbKZh4Blm7aXAR4heYtWBO7Kr2VeSXcNcqwgCIKgTxhuRQVLMtKMnUuAuSXt3hiQ9E5J75udg+Xlnf2BA/LQBGA322PzeuDywOY5uPhw4EBJK2fdUZL2zfe3aCx3SVoCWBR4mLR0tbES8wHrAbfnQOuTgKm2f9AyrUHXLFu4FlhJ0vLZM7UTaenqj7aXaHotz9le8XW/SVgktkAAACAASURBVEEQBEFPMs3u+m24MqKMHafQ8W1Ia3p3S5oCfJPkyZhdziR5Pd4HvB/4Y5Pes8AVwFZ5qWtv4BRJU0lBzEvmh24O3CrpJuB84Eu2HyNlSc2fH3st8LN8nPWBT5AMoRvz7YP5WJ3WLJeQ9BCwL3CQpIckLWj7FVJszvmkQOdJtqcM4f0IgiAIgp5iRKWeQ1ouIqWKt9v3yVl4/n2kJabGtoE18+aYNo/ftun+OcA5bR6zL8kIaR1/hhQ30zp+Be1jbbD9BLBJm/HHSEtU7Z7zJ+BP7fY1PWb+gfYHQRAE/cVwXnbqNiPKsxMEQRAEQfB6GXGenVklF/b7Zcvwi7bXrWM+QRAEQTCcGM4xNt2mZ42dXNhvXN3zCIIgCILhSCxjBUEQBEEQ9Ag969kJgiAIgqAz/bSMFZ6dIAiCIAh6mvDsBEEQBEEfEjE7QRAEQRAEPUJ4doIgCIKgD+mnmJ0wdoIgCIKgD4llrCAIgiAIgh4hPDvBDIzdfq7KNX999NWVawKMUj22/txzzFmL7sKLrlaL7m6vDqVP7+zxh0XfVLkmwHIH1lPH9Nb9Lq5F98A5V6lc894N9qxcE2D5y4+rRbck9rS6p1AZ4dkJgiAIgqCnCc9OEARBEPQh0/ooZieMnSAIgiDoQ9xH2VixjBUEQRAEQU8Tnp0gCIIg6EP6aRkrPDtBEARBEPQ04dkJgiAIgj6kn2J2wtgJgiAIgj6kn9pFxDJWEARBEAQ9TXh2giAIgqAPid5YQRAEQRAEPUJ4doIgCIKgD+mnAOXw7ARBEARB0NOEZycIgiAI+pAoKhhUgqQlJJ0q6W5Jt0n6k6SVJd3a4fFzSPqXpO+1jG8p6QZJN+Xj/G8eX0XSnyXdKGmqpBOqeF1BEATB8Md212/DlfDs1IQkAWcAE23vlMfGAYsP8LTNgTuAHSQdaNuS5gROANax/ZCkuYGx+fE/BI6yfVY+/tvLvJogCIIgGL6EZ6c+NgJetn18Y8D2jcCDAzxnAnAM8ACwXh5bgGS0PpGP8aLtO/K+JYGHmo5/S9dmHwRBEIxoptldvw1Xwtipj9WB62b1wZLmATYBzgFOIRk+2H4SOBu4X9Ipkj4mqfG5HgVcIulcSftIWrirryAIgiAIRgBh7IwctgQutf0c8DtgG0mjAWzvRjKErgH2A07O4z8D3gqcBmwIXJWXuWZA0h6SJkuafPKN91bxWoIgCIKa6aeYnTB26mMKsPbrePwEYFNJ95E8QouSlsKAtERl+yhgM2C7pvFHbJ9se2vgFZJHaQZsn2B7vO3xnx63/Gy9mCAIgmBkMQ13/TZcCWOnPi4B5pa0e2NA0juBN7c+UNKCwHuA5WyPtT0W+CwwQdL8kjZsevg44P78vC1yADOSliAZSA+XeTlBEARBMDyJbKyayJlU2wBHS9ofeAG4D9gbWEXSQ00PPwa4xPaLTWNnAd8H9gW+LOn/gOeBZ4FP5sdsDhwj6YW8/SXbjxV6SUEQBMEIYjgvO3WbMHZqxPYjwA5tds05C899EnhT3vxgh8fsSzKGgiAIgqBvCWMnCIIgCPqQ4Zwq3m3C2AmCIAiCPsTDOKC420SAchAEQRAEPU14doIgCIKgD+mnZazw7ARBEARB0NOEZycIgiAI+pB+Sj0Pz04QBEEQBLUjaYykCyXdmf9fpMPjlpN0gaSpkm6TNHawY4exEwRBEAR9iAv8GyL7AxfbXgm4OG+34xfA4bbfCqwDPD7YgWMZKwiCIAj6kGG4jLU1qWk1wETgz8BXmh8g6W3AHLYvBLD9zKwcODw7QRAEQRB0BUl7SJrcdNvjdTx9cduPAuT/F2vzmJWBpyT9XtINkg6XNHqwA4dnJwiCIAj6kBKeHdsnACd02i/pImCJNru+OosScwAbAGsBDwC/JfWDPGmwJwVBEARBEBTH9qad9kn6h6QlbT8qaUnax+I8BNxg+578nDOB9RjE2IllrCAIgiDoQ1zgNkTOBnbJ93cBzmrzmGuBRSQ1GmFvDNw22IE1DAOUghGKpD2yCzN0e0gzdHtXM3R7V3MkImlRYBKwHGmJanvbT0oaD3zG9m75cZsBRwICrgP2sP3SgMcOYyfoFpIm2x4fur2lGbq9qxm6vasZzEgsYwVBEARB0NOEsRMEQRAEQU8Txk7QTepak+4n3X56rf2m20+vtd90I16nZiJmJwiCIAiCniY8O0EQBEEQ9DRh7ARBEARB0NOEsRMEQRAEQU8Txk4wZJT4uKSv5+3lJK1TWPMQSQs3bS8i6TsF9RYe/FHVkgtr9TSSPlz3HEoi6ZezMlbRXHr+fKoDSXNKWktSu6aWQUWEsRN0g+OAdwET8vZ/gR8X1vyA7acaG7b/DXywoN6/JF0kaddhZPgM2AumFA2jtsBxt225bQec0NguoZl139iy/XFJP8zdm1VKN7Nai/ZoYO3Cmp0ocj5JerukqyQ9KOkESYs07bumhOYg8/l74eMfL2m1fH8h4CbgF8ANkiYM+OSgGNEINOgG69p+h6QbIBkekuYqrDla0ty2XwSQNA8wd0G9qcDRJIPu+5KuAE4BzrL9fClRSWd32gUsWkp3EHYDvl3guJOA80jN/xpGxnzAVqS2O78voAlwAfAOAEkHkToq/wbYEngrsE+3BSUdABwIzCPp6cYw8BIF05RrOp9+AnwTuIp07lwh6cO27wbmLKQJgKT/Mr1lU+OcmrcxbnvBArIb2P5Mvv8p4O+2PyJpCeBc0vdGUDFh7ATd4OV8RWqA3KBtWmHNXwEXS/pZ1v00MLGg3su2zwHOyYbVVsBOwI8lnW9750K6GwAfB55pGRdQbKmw6Qd4pl3APIVk3wUcSmr0d7xtS9rQ9qcK6TVo9t5sS/qxelbSb4DrSwja/h7wPUnfs31ACY0O1HE+zW/7vHz/CEnXAedJ+gRd6R05ID8HFgK+ZPsfAJLutb18Qc3mHk2bAacB2H6svKMw6EQYO0E3+CFwBrCYpO8CHwUOKilo+/uSbgY2JX1RH2z7/IKSr31LZU/OJGBSdlN/pKDuVcBzti+baULSHQV1nwLe2fiBaNF9sISg7Wtz3MjngUskfYXyP4aQvCtrkZb1R9t+Ns/nZUmvlhS2fYCkpYE30/R9bPsvhSTrOJ8kaSHb/wGwfWleovwdMKaQJlnr85LWBk6RdCbwI8qfU09J2hJ4GFgf2BVA0hyUu1AIBiGKCgZdQdKqwCYko+Bi21MLao0Gzre9aSmNNpr72T6iKr26ycHeZ9ueKaZC0mG2v1JYf2ngKGC87RUKa13aMrSz7UdzB+bzSzZwlHQoyUN4G9AwrGy7ZwKzJe0M3GP7qpbx5YCv2d69gjmMAj4HbA+8xfZSBbVWJl0ALgEcbfvnefz9wOa2v1hKO+hMGDvBkJHU7ursv7ZfLqh5NvCJxtViryNpcWBp0lXpI+08LkF3yUb13LafK6hxB7BGI/asKvr1fJK0JLCW7T/VPZegWmIZK+gG1wPLAv8meXYWBh6V9Diwu+3rCmi+ANwi6ULg2cag7S8U0GpkVRxAWrJ6Ux5+HDgLOLQ5M6zLumuRAjwXIrnFAZaR9BSwp+0iMSVZeyFgC5p+FEmejlKvtZb3uEl/POk8fgW40/btQDFDJ3MPKUi3EmOnzvOpw3xOsL1HYY1Vga1pOo9z3E4R77Ok3YE/274zZ/OdDGwH3AfsYvuGErrBwISxE3SD84AzGjEzkjYn/UhOIqWlr1tA84/5VhWTgEuADW0/BpCzK3YhBSCWqlHyM+B/bV/dPChpvbxvzRKikv4H+AYpU6nxo7gRcIikb9n+RQHZWt5jSe8DjiTFKa0NXAksIullkvewSIxS5jngRkkX02TwlDLaqeF86uD5hXRhVLJcBDnuawJwKtBYkl2GFMNzqu1DC8juRQqMJmuvASwPrEVa3tqggGYwCLGMFQwZSZNb4xoaY5JutD2ukO48wHK2SwbqNrTusL3K693XBd07ba/UYd9dtlcspHsHqaTAUy3jiwBX2165hGZN7/ENpFiKf0paHviB7W1ysPSXbG9eQjdr79Ju3HaRzMI6zqcc5H0/M2a9OW8vbbtYmYpcU2e11iX1XBpjSqf3Yoiar33n5Yy+q20fk7evt/2ObmsGgxOenaAbPJmvoE7N2zsC/84xD0VS0CVtBRwBzAUsL2kc8O2CgZ33S/oyMLEphXVx4JNAySv/cyX9kVSUrKGzLPA/JI9aKUT7rJVpzPij1U3qeo9H2/5nvv8AKTMK2xdKOrqgLrYnVmm0U8/5dA+wie0HWneUyuxrYhqwFMnYamZJypXHmJZjg/5NStr4btO+yMaqiTB2gm6wM2nJ48y8fUUeGw3sUEjzm6S6IH8GsH1jviovxY7A/sBlml72/R/A2ZR7jdj+gqQPMD3mQMBDwI8LB1l+F7he0gVM/1FcjrSUdHAhzVreY2CypJOAi0nv858BJM1LOoeLUbXRXtP5dDSwCMmQbOX7hTQb7E2qx3UnM57HK5Kys0rwdWAy6dw52/YUeG259J5CmsEgxDJWMCSy9+ZQ21+qWPdq2+tKusH2WnnsZttrVDmPXiYvWb2fGX8Uz3dqzdEzSJoT2B14G6m0/8m2X80el8Vst3oFuql9HbAxKaC1cR7fYvvtpTT7jZx2vg4znsfX2i5WQynX1Fmg+W+lYTzb/m8p3aAz4dkJhkT+Uaijl8+tuX7HaEkrAV8A/lrDPJD0Kds/K3Ts0aQS+8sA59r+a9O+g2wXa36av6hPHfSBFVDyPc7xHMe1GX+emZc/us0rtv+jGSvrFrsCrfN86jCfzWxfWFLD9jRSMcVW7fltt1aS7pbmK6RlLHJG1kYkb/dWwOIlNIOBiUagQTe4QdLZkj6hpkaOhTU/T2qi+CKpj9F/SC7rOvhWwWP/H/A+4AngWEk/aNpX+j1ui6RbapAt+R53RNK5hSVmMNolHUtZo324nU+1NLPN3Fby4JLWlXQMyWA+G7gcWLWkZtCZWMYKhoxSf6pWbPvThfTeRAoivat07ZUmzZs77QJWtl2kCWnz0lx2jR8HvJGU0npVY+mjgG6nHz6R+la9qcP+oWjW9R53yo4RcI7tJUvoZu15ga8Cm2e980mtT14opFf5+aSBm49ubHu+bms2ae87gPZXbXe9XYVSy5wdSDFKp5Ba6Ux22X5cwSCEsROMKCTtBhwC3E2qXbGH7U5fpt3U/QcpfqU1XkXAX0uVn5d0u+1VW8a+nueyWInU2azxMvBr2i+pfNT2AgU063qPXwUuo32W2Xq2eyaDpo7zSdK/6dx89Le2iy3rSHoBOJxUKLKVfWwvXEDzn8AdpMDsc2y/IOkeF257EgxMxOwEQ0bSG0jN7lYD3tAYL+TZ2ZtUN+OfklYg/SAXN3aAc0jdm29s3SHpzwV1J0vawtO7RmP725IeIVXCLcXNwBG2b23dIalUT7K63uOppEJ7d7bRLZoarVS1+UBgLDM2Ai0VaF/H+VRXM1tI1d3PdJsq7vnCqQRLkDx1E4CjlXqvzSNpjhzLE9RAeHaCISPpNOB2UgDet4GPAVNt71VAa4aiXFGkqwySNgDu71AbZbztyTVMqwiSPgrc0q7OjaSP2D6zzdO6pX0H8CXgFprqvpTMAOsnJK0CPNlUR6l53+Iu3BMsXwhuSTJ83kNqkrxzSc2gPWHsBEOmkf7diAfIqbzn2964gNbjzJghtFPztsuV2W9kVTRSWBu9oq5x4T8itentQ6rfUayzfF3U9R7XhaQrbL+nYs1azif1afPRBpIWJPUKPLLuufQjYewEQ0bSNbbXkfQXYE/gMdIPVNfXqNWhvH6DgmX2NycFc95JUwNFUnGyPW1fUEi3ubfPQ026OwGlevs0gld3BbYhVaBt/CieBZzUWn6/S5p1vcf7Av+xfVLL+OdJdVGKVVGWtAnp823tjfX7QnqVn0/q0HyU1Iusima2tTWXbTOfB2wvV6VmkAhjJxgyee37d6SGdz8D5ge+bvv4WifWRSRNBT5g+76W8eWBP9l+ayHdynv75OOfQvoxmsiMP4q7AGNs71hAs673+FbgHbZfahmfm1R8rlihSkm/IqUjT2H6MlbJTMZaekXRufno/9ku0sw2a5xPai470TM3l93UdqkGvp3m86DtZavUDBIRoBwMGdsn5ruXAUUzDiT9gQGKrrlcb6w5mP6j38zDwJyFNKGe3j6QfvxbG28+BFyVfzBLUNd77FZDJw++qJZqfwVY09VWS67jfJqv1dABsH2VpGJp55mxtg9r0X0MOExSEYNyEMK7UBNh7ARDJl8Bb8fMGSXfLiB3RIFjzgonA9dKOpUZe+zsSNnCaHX09oHUyHV74He5Am2j7P72zJwa3i3avcfLkpZYihafaxesmmNMSnOVpLfZLlrgrok6zqe6mtlCDc1lc9HNdkaNiOrJtRHLWMGQkXQeqYLxdcBr/WZKB+Jl1/vKefOOEnEkLXpvAz7MjD12zi79Q6V6evuMBQ4j9W36d9ZdmLQksL/tewvpvpWZm1QWfY8l/Q+p3cgXSanKAGuTmlT+uFQcWNaeCrwFuJcUsyOSp6nk0lkd51O75qNnu2wz20Z/t/2zdmtz2cNsP1lAcyWSUdNqTL2ZFJh9V7c1g8EJYycYMpJutb16xZobkuJJ7iN9eS4L7GL7LxXO4Y22/1WBznLA07afykbIeFJq/5TS2ll/UdJ3RfHXWhf5x3h/YHXSVfkUUgBr0XYRkt7cbrxk6nnd51OvI+kc4EDbN7eMjwe+YXurembW30RvrKAb/FVS1V2ajwQ2t/0+2+8lVYA9qpSYpA9IulfSFZLWkjQFuFrSQzmjppTu/qRYqKtyIPh5wAeASepcCr9b2utIeqftJ4DFJO2bjYJSels03V9I0omSbpb0m9JLSrbPzefSorbfmO+X7ovVMGoWJjWI3ApYuLChU9v51GE+J1SgsaqkTVrjg5rPty4zttXQAci1qcYW0gwGITw7wWzTtDY9B7AScA/VueJvbj1+u7Eu6t1IStldmFTp90M5wPKtwK9LFTbMRtV4YF6SF2sFp+rR8wFXl/KoSfoG6UdwDuBCYF3gz8CmpBpK3y2g+VqBSEknkkoY/JTUoPJ9tj/Sbc2sdYHtzfP9A2x/r4ROB+29gN2BRqr5NsAJto8tpFf5+SSpU/8pATfZXqbbmk3aXwA+S6qSPQ7Yy/ZZeV+RgqSS7rK94uvdF5QlApSDobBljdqTJZ0E/DJvf4wUM1SKaY2ia5Kes30VgO2pOQaiFK/afl7SS8DzpG7V2H62cKLQR0k/DnOTjI5lbD8t6XDgaqDrxk4L422Py/eP0iD1lYZIc1PT7YHKjB1SLaN1bT8LIOkw4G9AEWOHes6nf5Kyv5oFnLcXa/uM7rE7sLbtZ/KS3emSxto+pmU+3eRaSbvb/mnzoKRdKfsdFQxAGDvBUFgMeGOru1/SVqQCdCVL3v8/0hXbF0hfWn8hFaQrxVOS/hdYkJSptA8wieTpaG1w2E2ul/QbYD5S4bmJOSB8Y6BkYPQrOWD1OUl3234aIP9QlkpRXiwvpQhYUJI83fVc0qCs070tmoL68/2SVmwd59M9wCZu33qkaO8xUlHIZwBs35dj/U7PsVKl3ue9gTMkNV+AjQfmInnughoIYycYCoeTUjhbmQqcQPoCLYLtF4Ef5FsV7AIcRPphbDT5O59k0O1eUHc3krfBwOmkLJqdSV2Vf1xQ9yVJ89p+jpSZBLxWkbaUsfNToNFNfSLwRuCfSkXgZmoO2kVWkHQ26cevcf81CtZuglSE82pJZ+Ttj1A2zb6O8+loYBFgJmOHlPFWksckjXNuLps9PFuSyhwUiTPMKe7vlrQRKeAd4I+2LymhF8waEbMTzDaSbulUEE3STSUqo0qaKfCvmZJxQv2EpLmzQdk6/kZgSdu31DCtIkh630D73aZbd5f130FqEingL7ZvKKnXT0hahuSlfKzNvvVtX1nDtIIaCM9OMBTmGWBfqcqo00hXpb8B/kCKO6iEfKW2HSnN/RVSD6ef2r67oOb1pODVU0rqtNJUPbhdU84iKeg5kPVzWeck4EDgXSRP4SG2SxUzvKGxTNdmTkX6GLUE7d6Xb6/tK1H/JR+7lvNJNTUftf2QpOUkLdySan97GDr9RaSeB0PhIknfVUtko6RvkYrPdZ0ctDqB1H/rN6RA2dWAhwun7B5Kqvh6FfAyKQ7hbtL6//aldEnu/4WBSyVdI2kfSUsV1ANoNOW8E/gm8EHgQ8C3gDvzvhL8imQkrw1cCixBKmz4PPDzQpqQsswAkHRxy74zC2n+i7Q0Nznfrmu6TS6kCTWcT0rNR08lea6uAa7N90/JqfAltTul2v+2jlT7oD5iGSuYbXK66omkq/9GTMWapC/r3RqBgYXnsCMp1uAw24cX1HltyU6pI/hlttdXqtB6ecEU8OZ07A1Iht62JG/HKbaL1ClRDU05Jd1oe1w2nh+yvXTrvm5r5mPfYHut1vvttruoeQywIXAlcApwhSv4Mq7jfFJNzWyzRi2lG4LhR3h2gtnG9rO2JwCbka68f04q9LdTs6EjabVu6kpaWtIXJV0BfBzYB/hJNzXaMK1p6WEpYDRAXlop3SySrHW57T1JSwGHkZZ4SlFHU85R2XhcFpg/Lzk0KjjPVUgTZszGajU4ihggtvcipfafBnwCuEHS97MxWQkVnk+N5qOtlG5mCznVHniKllT7wrrBMCNidoIhY/se0rJOJ34JdKV4l6TLSBk7k0iZYI3YhrlKxjoAh5B+kO4AViWlviPpTcBNhTQBZuownlPCz6NsE8U6mnJ+D7g93/80cKIkA28jLaGVojnlvXGfvP2mzk8bGtmTc6mkG0jv68HkOLBSmtRzPtXVzBY6p9pvQtnSDcEwI5axguJ0cylA0n1Mv9puPnkbVZtX6IZOB+0xwArAXbafKqUzXFA9TTlHk76XXsnLheNI8ViPFtT8xkD7bXfd0MrLKFsDO5IMqt8Dv7Vduu5MLaiG5qNZdw5mTLVfl7R09wCpyWt4ePqEMHaC4qhQWfbhgKRVgP1sF6m1M1gQpe2q6gy9RqmU3ZyC3RHb1w+0vzTqYisJSc+SvDinAHfRslxm+/ftntcF3crPJ0nzAi83Ynby38wHgftsnzHgk7s/lzlJtW8etv14ldpBvcQyVjAikbQNcInt/+TthYENbRfJnpG0BnAEKfbgTFI5/+NIV4pHltDMHEEK/j6X6X3HipM9LDuQrsTPs31rLsZ2IKnkQNeDdkmB7VNI7QVg5vYCxYpUziLdbCVxGuk1rZpvzZjpvbK6TR3n03mkthh3SlqR1A7j18CWktaxfUApYUnHA8fanpILYv6NVKV6jKT9bJ9SSjsYXoRnJyiCpKVsP5LvX2V7vS4ff6bsnFKZM/nYV5OCoP8GbAF8mZT6/jXbL5TQzLrjSPEcW5DSkk8BLi6duSPp56QYnWtIBt39pADW/QsalPuQ6hj9h5SqfEYVGX2zSsnzqyrqOJ9aMhkPBsbY/mzOxrquU2HSLmlPsb1avr836YLoI0pVuc8d6Z9nMOuEsRMUQdIDtosUZMvHb9f1vGNF5y7ozWBcKfX0GVs65qBlDu8mxRtsCnzF9tmDPGUoWrcCa9ieJukNpLowK7arRFtAe3nS69yaZGQd0ij3XyfdXI6V9HHbv+q0rFTF8mRV51Pz36qkK4HDGwazClVab9JuLivwR+A02z9v3Rf0PrGMFZSitHt8sqQfkGrsGPg8ZTsKv0HSWkx/Xc8Aa+SaMMXjSXLW11qkfj4PAaXjDV6yPQ3A9guS/l6FoZP17pV0Fmm57BPAypTtjTWrdPOcblQYX2DARxWi4vPpZklHkMoWrAhckOewcEHNBk/l5deHgfVJy2mNwOWBKsAHPUZ4doIiVODZmQ/4GumqVKQv0O+Uyq6QdOkAu227SDyJpE+RMnbeQMommVRFYKWk50iBs5De37fk7UbWW9d7kElagbTEsjUpRflU4JySy4RZ93O2fzQLjzvQ9iEl51KaOs4nSfMAe5Hq6pxs+6Y8/m7gLbZ/WVB7ZeCHpGrcRzd5dd5Pqgn2xVLawfAijJ1gtpF0LO2LrgnYxfaCFU+p55A0DbiF6R2jW7N2inTklvTmgfa7QGuO/FpvBs4Cnmbm11pkaafObMHsYdkdGEuTp932pwvpVX4+SdrM9oUd9h1m+yvd1gyCVmIZKxgKA/XwKdLfR9LRtveW9AfaGFqlfvwHmM9mwJdtb1ZIYqNCxx2QhjGT42dWI73XU3MByVJ8m+mf6fwFdYYTZwGXAxeRsoRKU8f59GNJ+9j+Y2Mg1905meRxKYak7wP32D6+ZXwfYIkwtPqH8OwERZB0hO39Chx3bdvXSXpfu/22L+u2ZtbdGDie6annhwC/IHmxvluwLsqXgSOrDITOuguS+p6NJ8XLiNT37DpgV3foEj4SkfQK8Fy7XaQlu2IeynZZhSWp43xSavtxHnCg7d/ngPfTSd67XdzSM6vL2rcBqzfiz5rGRwE3O3pj9Q3RGysoxQ4lDmr7uvz/Ze1uJTQzRwJ7AIuSvqivAn5pe+1Shk7mzcB1ktYvqNGOH5LK6a9oe1vb25Didm4BBo1vmR0kvUHSLpI+rMSXJZ0j6RhJbyyhmbnF9oJtbgtUsBR7jqQPFtZopvLzyamZ7KbAwZI+Q2rb8HfbO5c0dKbLe6b+W3mskppVwfAgPDtBESQ9aHvZAse9hQGaM5YInM26M8R1SLrb9ltKaLXRfgepiOHtpFo/r315l8oCk3SnO3SjHmjfEDUnAS+TMpUWAW4F/gC8Bxhne8tua2bdylOQJf2XdB6L9HpfIr12KO9NqvR80vTK2EuSvKEXAt8vqdmkfS2ws+07W8ZXInV5H19KOxheRMxOMNtoehfwmXZR7qqp8YP32fx/I5PjY7RfiugWC0vatmlbzdslvTu2r5f0EFP/FAAAETdJREFUVeB3JO9Kc2+wUlWF67jqfZvt1XNa8EO2G0uV50kq2Wz1tILHbovtWlLOs3bV51NzhfGbgcWbxkpXxv46cK6k7zC9NMV44ABSg9KgTwjPTjDbSLqX6Venrdhlm3JeaXv9wca6qPezAXa7YPbMYqQfhhWAPRtpu6WRNBG4GzjYTV8Skr4GrGz7EwU0X/OetfGkFcuYUmoE2umL0LYPLqHbpL8tyXtl4HIXqlCdtWo5n+pE0urAl0g9sSC1JDnc9i31zSqomjB2ghGJpBuBz9m+Im+/GziuymDPKpB0D3Ao8FNX+MeaA5RPAt5BClA2qQjdDcBuLtD1XdLjpNo6ItWCObWxC9jB9uLd1sy67WqtzAvsBixqu1hmmKTjSIX2Gj2adgTutv3Zzs8akl7l51OLRxTSufQv4Ebb/61iDq3kIOmtbFfu1QvqIYydYLbRzF2qDfzL9oMVaK9NSl1dKA89BXy6YAxLa1n/xhf2FbbvLaGZdd9k+59txpcFdrJ9eCntrPMW4G0kg2OK7bsLau0y0H7bE0tpN81hAVIBvF2BSaTMpWJF9yRNIWULOW+PIgVMr1ZIr/LzqYNXdAywBimz75Jua3aYx2hgc1KLjPeTvGgfrUI7qJ+I2QmGQrtu32OUGvxNcMF+Rjkra83sgZBz9/OCtIuxGAt8VdI3bZ/aZv+Qaf5hyhlJ25O+rJcGziihmbU+bvtXtu+WtITtK5v2zVLF4ddLszEjaf40VKYidis5/mxfUuzXROAdtv9dgfQdwHKkHmCQmq/eXEqsjvPJ9qfajefClZNIjWaLIem9wM7Ah0iNbdcHlrddMsYvGGaEZyfoOpLGAz+w/d4Cx669gWLLfMYAFxWMJ1kA2Ib0Zb0y6QdpR9vLlNBr0q0rfub/kYJHG72jngEOs31cCb2seTiwLXAC8GNX2Gld0mXAO0k/wuT7fyMH27vLRTLrOp8GmE/R6tWSHiJVi/4JcKbt/0q61/bypTSD4Ul4doKuY3tyvjIvQa0NFFux/aSkkplLj5N+CA8iLZlZ0jYF9Rqow/12290RlA4C3g1s6FypWalf1jGSxtj+Tgld4IvAi6T3+KtNH2fxooKkbKEqqet8mglJq5De95L8DvgIKRbqVaUGs3GF34eEZyfoOpIWB/5ke+2651KaXFn5IJdrBLoPqTnmfMBvgN8CF5bMdMu6lXt2JN0BrOmWxp9KjSRvsr1ytzWHC3k5trk31pOFdCo/n9S+tcsYUt2dj9v+WyntrC9Sm4wJwAeBBUkxWX+q0osX1EsYO8Fso/aNQMeQrs73sv2HgtoTs8ZTeXsRUjBpqRTwdsUMxwCPkEreTy2h26S/AunLeidgJeAbwBm2/15Ir9H1vLnjOXl7BdvzdXruEDTvsL1Kh323216125p1I2kP4GDgeVJxv4Y3qbQxW9n5pJlbuxh4ArjT9kvd1htkLnMCW5Be++a2S1bmDoYRYewEs02b7JnGl9i1JTNYsvZMVW9LVsLVzF3ADTxRVQBty1zeToq52MGFqji3eb0z4DJdzy8GDrF9ccv4xsDXbNfSFLUkku4E3mX7XzXOoej5JGk921d1+7hDRdJ7bf+l7nkE1RDGTjDbSFrO9gM1ad9Eiu34d94eA1xm++0VzmE+UjzAzrY/VFhrYdIVOKS+QqWzzypH0mqkLuBXkKrdmhSwuz6wte0pNU6vCJLOA7atOjOoyvOpZUn0b7bfVUqrjfZoUp++pYHzbN8qaUvgQGCeUhdHwfAjApSDoXAmqegckn5ne7sKtY8E/irpdNKP4g6kTuRFyWn1HyRdCW9BCoA8vrDeCSSj6l7SMsebJZ0BfKbUMoCkXYExjborkh4mBYUL+LLtn3Rb0/aUXO12Z2C1rPUX4H9b43h6iANI5/HVNAXr2v5CCbGazqfmgPY3FDj+QJxESue/BvihpPuBdwH7u2Cl6mD4EcZOMBSav8SKxhi0YvsXkiaT+uqIdHV8Wyk9SZsxvRjZpaSeXOt0qiHSRQ4C5gSWbVSbzenDPwa+lm8l+AzJmGvwuO2lc+XZC0ipvF3H9guSLiVlDRmY2sOGDsD/AZeQusnP1J27AHWcT6NyTN2opvuvfXeUCsbOjAfWsD0tn7v/Ala0/VhBzWAYEstYwWwzUMZOxfOYj1Q7ZEKp5SRJ04DLgU86V0yWdE8FgaS3koyq51rG5weusr16+2cOWfe65mw6SQfaPiTfv9b2OwtoLgicCKxNalExCliTtKS1q+2nu61ZN5L+avvdFepVfj5Juo/pwdetFA3GrrJGVDC8Cc9OMBTWlPQ06UtsnnwfKqhPUvVyEukHeCfgIqX+QqcCowvqNZjWLp7D9jOSSl6pLNS80WTojAIWLaT5Q+A2UtuCaVlPJG/Dj4D/KaRbJ5fmjKw/MOMyVilvR+Xnk+2xs/I4SasViMtaVVKjIrWAt+TtxnfUGl3WC4Yp4dkJRhRtlpN+Cxw7q1+oXZrD+nkO25E8EGfYPqGQ1k3AhrS/Kr7U9pqFdI8DnrR9UMv4d4A32v5MAc07ba/0eveNZCS166tWzNtR1/k0K5TwutSRVRgMT8LYCUYUdS0ndZjLKGAzkifiU3msq1endS0B5KXBE0nZUDfl4TWByaSu510vxibpLtsrdtjXk8ZOOyTNVTDw/D5qWlIajJKlI2ZBu9IssaB6YhkrGGnUtZw0E3mp5fx8a/BLcoZalzTGzsrjum1k5fpBE3LxuUYH7tvc0vW8y7pXSvo6cLCbrsIkfQ0YdnVauklTld+dga2AxUvo1LykNBh1XnlXnSUWVEx4doIRS5XLSa9jTrVcndYVeNlN3RygfBLJWLyR9OO3FnADKUC5F2sLrUsycLYhVeT+LHC2q+m4PtC8Kj+fak5yiMDlHmdU3RMIgtnF9pW2P0cqGHY0qX4G8FqBulqmVZNuyWaklejaftr29sDmwM+BX5BK+n+02dCp8bPtGpK+m6snH0JKO18L+KftiXUbOpk6zqdKW0cE/UUsYwUjniqWk0YAdRlZXdfNS2V3D/CQXvhs9wDuINUrOifXFxpObvauzSUHCT/VMFglbUQqang/8KNGfJLt9bqlORvUdbEQVER4doJepbIvL0lLNW3G1Wl5euGHaQngu8CHgbsk/ZJUvqEXL0AnkbqsI2kccBrwACng/biSwpIumMWHfqLkPIL66cU/rCCAaj0dVwHLQbVXp5KWsv1I3qzMyKpLt4nh5AGZLWy/CpwLnJsr+24JzAs8LOli2zvXOsHufq7zNJ0vHwdOtn1kzma8sYs67XjTrDzI9q2F5xHUTBg7QTB06vI01GJk1ajbk+R2GKcDp+fWDduW0qppSan572NjUj8wcguH0n87C0nq+H7a/n1h/WCYEMZO0DPU6HGIoOTSgvV7k4ogaW5SNuFYqvk+nkTK/PpP05LS95i+pLRbAc1LJE0CHgUWIfUCQ9KSQOm+ZwuRvGZt6woBYez0CWHsBL1EMY+DpGNpb9QIWLibWq+DnglKngV61Zt0FvAfUv+vFwd5bDeoY0lpb2BHYEngPbZfzuMrkdLtS3K/7U8X1ghGAGHsBL1ESY/D5NncNyTqMrKGoXHXC0HJ7VjG9haDP6xrVL6klAtEngopQFnSXsAOwL2kkhElWUXS+ravbB6UtAHwSGuRzKB3CWMn6CWKeRxsT+y0T9IRpXSpyciqUbcTIz4ouQN/lfR227dUpFf5kpKklUlVzycAT5D62cn2RiX0Wrga+G+b8edJhtZWFcwhGAaEsROMKIahxwHSVep+JQ5cl5FVh+4w/WxL8x7gk7kh6IuU78Zdx5LS7aR+dlvZvgtA0j6FtFpZzPbNrYO2J0saW9EcgmFAGDvBSGO4eRygviWWYkZWTbrD8bMtzQeqFKtpSWk7kmfnUknnZf2q/mYG6nk1T0VzCIYBYewEI4q6PB2SOl31ij7KiiqpW+NSYW3Yvh9A0mJU0IyyjiUl22cAZ0iaj5Tmvg+wuKSfkPrZzWrhv9nhWkm72/5p86CkXUlB4UGfEI1Ag55B0gO2lyt07HtJSyxtU1htr1BIdyAj6ybby/SS7gDzKfbZ1omkDwNHAksBjwNvBqbaLtL/S9I00pLSrk1LSveUOn8HmMcYYHtgR9sbF9RZHDiDVK6gYdyMB+YCtrH9WCntYHgRxk7QM0h60Paydc+jm9RoZNWiO8B8eu6zBZB0Eykr6iLba+UifxNs71FIbxuSZ+fdQGNJ6UTby5fQGy7k93X1vDnF9iV1zieonjB2ghFFjZ6O1saTBv5l+8ESev3IcPMmVYGkybbHZ6NnrZwCfo3tdQrrNpaUJpCMrYmUX1IKgtoIYycYUdTo6bi0zfAYkjt8gu0iBdnqMrLq0B1u3qQqkHQRyeg4FFiUtJT1TtvvrnAOlSwpBUGdhLETBENA0njgB7bfW+j4dRlZtej2G9nD8jwwCvgYqb3Br20/UevEgqDHCGMnGFEMx+UkSdfbbp1Xac2iRlYdusPxs62C3JxzJdsXSZoXGG27XSG8IAhmk0g9D0YaR7YZGyOpFo9Dzvao/IohF0Wbv8d0h9VnWwWSdgf2IHnN3gIsDRwPbFLnvIKg1whjJxhRdKoHkj0OPwRKLSe1q+47hpTVslcJzUHmU4uRVVK3rs+2Zj4LrENqa4DtO3PNnSAIukgYO0FPUIGno7WCr0lF2fa1/Xgp0bqMrOFk3NXlxaqIF22/1OjBKWkOercPWBDURhg7QU9QgafjUtsPFDx+J2oxsmrUnYm6vFgVcZmkA4F5JG0G7An8oeY5BUHPEQHKwYhiMI+D7SI/FM1ByJJ+Z3u7EjptdJerw8iqQ7euz7ZOJI0CdgU2J6Xcn08q8hdfzEHQRcLYCUYUknZpGWp4HK4tvJx0g+21Wu+XpkYjq3Lduj7bIAh6n1jGCkYadS0nucP90jQX2KuyqF4dunV9tpUj6eaB9tteo6q5BEE/EMZOMNI4E6jc0wGsKelpkhEwT75P3rbtBQvp1mVk1aFb12dbB9NI7+tvSDE6z9c7nSDobcLYCUYatXg6bI+uSquFuoysOnTr8mJVju1xklYl9ab6DXBb/v8C26/UOrkg6EHC2AlGGnV5OmqhLiOrJt1++2xvB74BfEPSjsAvgMOAw2udWBD0IBGgHIwoJL0KPEv2OADPNXZR1tMRFKbfPltJSwM7AdsA/wYmkTqPP1PrxIKgBwljJwiCoGIkXQYsQDJwTgeebN5v+8l2zwuCYPYIYycIgqBiJN3H9KW65i/hhherp2OWgqBqwtgJgiAYpkhazfaUuucRBCOdUXVPIAiCIOjIL+ueQBD0AmHsBEEQDF80+EOCIBiMMHaCIAiGLxFnEARdIIydIAiCIAh6mjB2giAIhi8v1T2BIOgFIhsrCIKgYiS9GXjK9n/y9kbAR4D7gR/ZDiMnCLpIeHaCIAiqZxIwH4CkccBpwAPAmsBxNc4rCHqS6I0VBEFQPfPYfiTf/zhwsu0jJY0CbqxxXkHQk4RnJwiCoHqaU8o3Bi4GsD2NSDcPgq4Tnp0gCILquUTSJOBRYBHgEgBJSwIv1DmxIOhFwtgJgiConr2BHYElgffYfjmPrwSMqW1WQdCjRDZWEARBjeQA5Z2BHYB7gd/bPrbeWQVBbxGenSAIgoqRtDKwEzABeAL4Lenic6NaJxYEPUp4doIgCCpG0jTgcmBX23flsXtsr1DvzIKgN4lsrCAIgurZDngMuFTSTyVtQmRhBUExwrMTBEFQE5LmI1VOnkBKQZ8InGH7glonFgQ9Rhg7QRAEwwBJY4DtgR1tb1z3fIKglwhjJwiCIAiCniZidoIgCIIg6GnC2AmCIAiCoKcJYycIgiAIgp4mjJ0gCIIgCHqaMHaCIAiCIOhp/j8CrAVuH10W/gAAAABJRU5ErkJggg==\n"},"metadata":{"needs_background":"light"}}]},{"metadata":{},"cell_type":"markdown","source":"#### also checked the corelation in regards to the class since am trying to build a ML agorithm for that class"},{"metadata":{"trusted":true},"cell_type":"code","source":"\ndata.corr(method='pearson')['CLASS']","execution_count":14,"outputs":[{"output_type":"execute_result","execution_count":14,"data":{"text/plain":"FULL_Charge 0.534602\nFULL_AcidicMolPerc -0.598816\nFULL_AURR980107 -0.584111\nFULL_DAYM780201 -0.554838\nFULL_GEOR030101 -0.260470\nFULL_OOBM850104 -0.453287\nNT_EFC195 0.260702\nAS_MeanAmphiMoment 0.693552\nAS_DAYM780201 -0.437168\nAS_FUKS010112 0.033432\nCT_RACS820104 0.267652\nCLASS 1.000000\nName: CLASS, dtype: float64"},"metadata":{}}]},{"metadata":{},"cell_type":"markdown","source":"#### Most of my variables are positively skewed"},{"metadata":{"trusted":true},"cell_type":"code","source":" data.skew().plot(kind='bar')","execution_count":15,"outputs":[{"output_type":"execute_result","execution_count":15,"data":{"text/plain":""},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"
","image/png":"iVBORw0KGgoAAAANSUhEUgAAAXcAAAFoCAYAAACsQLuwAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xm8JFV9/vHPw6YIKiCDIjAMKmrEqMiIuxFcgkZERASMBg1KFlc0iWAUfnEnijEqSjASkSiIKIgK4gJxDciwyCIiiCCIBhTZREXk+f1xqmd6evrOvTNzqvp28bxfr/ua7uq+9a3bU/2tqlPnfI9sExER/bLWpDcgIiLqS3KPiOihJPeIiB5Kco+I6KEk94iIHkpyj4jooST3iIgeSnKPiOihJPeIiB5aZ7Y3SLo78E3gbs37T7B9yMh77gZ8AtgB+BWwl+0rV7beTTfd1IsWLVq9rY6IuIs655xzfml7wWzvmzW5A78HdrZ9q6R1gW9LOtX2mUPv2Q/4te0HSdobOBTYa2UrXbRoEUuWLJlD+IiIGJB01VzeN2uzjItbm6frNj+jBWl2A45uHp8APE2S5ritERFR2Zza3CWtLel84Drgq7bPGnnLFsDVALbvAG4C7lNzQyMiYu7mlNxt/9H2o4AtgR0lPXzkLePO0lcoNylpf0lLJC25/vrrV31rIyJiTlapt4ztG4H/AXYZeekaYCsASesA9wZuGPP7R9pebHvxggWz3g+IiIjVNGtyl7RA0kbN4/WBpwM/HHnbycC+zeMXAKc7heIjIiZmLr1lNgeOlrQ25WBwvO0vSnorsMT2ycDHgGMkXU45Y9+7tS2OiIhZzZrcbV8AbD9m+cFDj38H7Fl30yIiYnVlhGpERA/NpVkmIoYsOvBLq/V7V777LypvScTMcuYeEdFDSe4RET2U5B4R0UNJ7hERPZTkHhHRQ0nuERE9lOQeEdFDSe4RET2U5B4R0UNJ7hERPZTkHhHRQ0nuERE9lOQeEdFDSe4RET2U5B4R0UNJ7hERPZTkHhHRQ0nuERE9lOQeEdFDSe4RET2U5B4R0UOzJndJW0k6Q9Ilki6W9Nox73mqpJsknd/8HNzO5kZExFysM4f33AG8wfa5ku4JnCPpq7Z/MPK+b9l+Tv1NjIiIVTXrmbvtn9s+t3l8C3AJsEXbGxYREatvldrcJS0CtgfOGvPy4yV9X9KpkrarsG0REbGa5tIsA4CkDYHPAq+zffPIy+cCW9u+VdKzgZOAbcesY39gf4CFCxeu9kZHRMTKzenMXdK6lMT+SdufG33d9s22b20enwKsK2nTMe870vZi24sXLFiwhpseEREzmUtvGQEfAy6x/b4Z3nO/5n1I2rFZ769qbmhERMzdXJplngi8BLhQ0vnNsjcBCwFsHwG8APg7SXcAvwX2tu0WtjciIuZg1uRu+9uAZnnPh4AP1dqoiIhYMxmhGhHRQ0nuERE9lOQeEdFDSe4RET2U5B4R0UNJ7hERPZTkHhHRQ0nuERE9lOQeEdFDSe4RET2U5B4R0UNJ7hERPZTkHhHRQ0nuERE9lOQeEdFDSe4RET2U5B4R0UNJ7hERPZTkHhHRQ0nuERE9lOQeEdFDSe4RET2U5B4R0UOzJndJW0k6Q9Ilki6W9Nox75GkD0i6XNIFkh7dzuZGRMRcrDOH99wBvMH2uZLuCZwj6au2fzD0nmcB2zY/jwU+0vwbERETMOuZu+2f2z63eXwLcAmwxcjbdgM+4eJMYCNJm1ff2oiImJNVanOXtAjYHjhr5KUtgKuHnl/DigcAJO0vaYmkJddff/2qbWlERMzZnJO7pA2BzwKvs33z6MtjfsUrLLCPtL3Y9uIFCxas2pZGRMSczSm5S1qXktg/aftzY95yDbDV0PMtgWvXfPMiImJ1zKW3jICPAZfYft8MbzsZ+Kum18zjgJts/7zidkZExCqYS2+ZJwIvAS6UdH6z7E3AQgDbRwCnAM8GLgduA15Wf1MjImKuZk3utr/N+Db14fcYeGWtjYqIiDWTEaoRET2U5B4R0UNJ7hERPZTkHhHRQ0nuERE9lOQeEdFDSe4RET2U5B4R0UNJ7hERPZTkHhHRQ0nuERE9lOQeEdFDSe4RET2U5B4R0UNJ7hERPZTkHhHRQ0nuERE9lOQeEdFDSe4RET2U5B4R0UNJ7hERPZTkHhHRQ7Mmd0lHSbpO0kUzvP5USTdJOr/5Obj+ZkZExKpYZw7v+TjwIeATK3nPt2w/p8oWRUTEGpv1zN32N4EbOtiWiIiopFab++MlfV/SqZK2q7TOiIhYTXNplpnNucDWtm+V9GzgJGDbcW+UtD+wP8DChQsrhI6IiHHW+Mzd9s22b20enwKsK2nTGd57pO3FthcvWLBgTUNHRMQM1ji5S7qfJDWPd2zW+as1XW9ERKy+WZtlJB0LPBXYVNI1wCHAugC2jwBeAPydpDuA3wJ723ZrWxwREbOaNbnb3meW1z9E6SoZERHzREaoRkT0UJJ7REQPJblHRPRQkntERA8luUdE9FCSe0REDyW5R0T0UJJ7REQPJblHRPRQkntERA8luUdE9FCSe0REDyW5R0T0UJJ7REQPJblHRPRQkntERA8luUdE9FCSe0REDyW5R0T0UJJ7REQPJblHRPRQkntERA8luUdE9NCsyV3SUZKuk3TRDK9L0gckXS7pAkmPrr+ZERGxKuZy5v5xYJeVvP4sYNvmZ3/gI2u+WRERsSZmTe62vwncsJK37AZ8wsWZwEaSNq+1gRERsepqtLlvAVw99PyaZllEREzIOhXWoTHLPPaN0v6UphsWLlxYIXQdiw780mr93pXv/ovKWxIRbborfddrnLlfA2w19HxL4Npxb7R9pO3FthcvWLCgQuiIiBinRnI/GfirptfM44CbbP+8wnojImI1zdosI+lY4KnAppKuAQ4B1gWwfQRwCvBs4HLgNuBlbW1sRETMzazJ3fY+s7xu4JXVtigiItZYRqhGRPRQkntERA/V6AoZsZy7UneziPkqZ+4RET2U5B4R0UNJ7hERPZTkHhHRQ0nuERE9lOQeEdFDSe4RET2U5B4R0UNJ7hERPZTkHhHRQ0nuERE9lOQeEdFDSe4RET2U5B4R0UNJ7hERPZTkHhHRQ0nuERE9NC9nYspMPhERayZn7hERPZTkHhHRQ3NK7pJ2kXSppMslHTjm9ZdKul7S+c3Py+tvakREzNWsbe6S1gYOB54BXAOcLelk2z8Yeeunbb+qhW2MiIhVNJcz9x2By21fYft24Dhgt3Y3KyIi1sRckvsWwNVDz69plo3aQ9IFkk6QtNW4FUnaX9ISSUuuv/761djciIiYi7l0hdSYZR55/gXgWNu/l/S3wNHAziv8kn0kcCTA4sWLR9dxl5GunhHRtrmcuV8DDJ+JbwlcO/wG27+y/fvm6UeBHepsXkRErI65JPezgW0lbSNpPWBv4OThN0jafOjpc4FL6m1iRESsqlmbZWzfIelVwGnA2sBRti+W9FZgie2TgddIei5wB3AD8NIWtzkiImYxp/IDtk8BThlZdvDQ44OAg+puWkRErK6MUI2I6KEk94iIHpqXVSEjoki32VhdOXOPiOihJPeIiB5Kco+I6KEk94iIHkpyj4jooST3iIgeSnKPiOihJPeIiB5Kco+I6KEk94iIHkpyj4jooST3iIgeSnKPiOihJPeIiB5Kco+I6KEk94iIHkpyj4jooST3iIgeSnKPiOihOSV3SbtIulTS5ZIOHPP63SR9unn9LEmLam9oRETM3azJXdLawOHAs4CHAftIetjI2/YDfm37QcC/AYfW3tCIiJi7uZy57whcbvsK27cDxwG7jbxnN+Do5vEJwNMkqd5mRkTEqphLct8CuHro+TXNsrHvsX0HcBNwnxobGBERq062V/4GaU/gz22/vHn+EmBH268ees/FzXuuaZ7/uHnPr0bWtT+wP8DChQt3uOqqq2r+LTGDRQd+abV+78p3/0XlLWlH3/++LnX9Web/btVJOsf24tneN5cz92uArYaebwlcO9N7JK0D3Bu4YXRFto+0vdj24gULFswhdERErI65JPezgW0lbSNpPWBv4OSR95wM7Ns8fgFwume7JIiIiNasM9sbbN8h6VXAacDawFG2L5b0VmCJ7ZOBjwHHSLqccsa+d5sbHRERKzdrcgewfQpwysiyg4ce/w7Ys+6mRUTE6ppTco+Iu4a78o3Kvkn5gYiIHsqZe0RMTK4U2pMz94iIHkpyj4jooST3iIgeSnKPiOihJPeIiB5Kb5mYeulxEbGiJPe7gCS/iLueNMtERPRQkntERA8luUdE9FCSe0REDyW5R0T0UJJ7REQPJblHRPRQkntERA8luUdE9JBsTyawdD1w1Wr86qbALytvTuIl3jTG6/Pflngz29r2gtneNLHkvrokLbG9OPES764er89/W+KtuTTLRET0UJJ7REQPTWNyPzLxEi/xOo+VeFMWb+ra3CMiYnbTeOYeERGzSHKPiOihJPeIiB6amuSu4sWSDm6eL5S046S3a01I2mjS2zAg6RmT3oY2SHrupLehJknHzGVZy9vQy32lS5LWlbS9pM3aijE1yR34MPB4YJ/m+S3A4W0Fk/TO4eQraWNJb68c5peSviZpv3mQ6D/WZbDBQbryOp8/8rMHcOTgeQvxNh15/mJJH5C0vyTVjtfYbiTm2sAOLcWaSfV9RdKfSjpT0tWSjpS08dBr36sdb4Zt+FGL6z5C0nbN43sD3wc+AZwnaZ+V/vLqxpyW3jKSzrX9aEnn2d6+WfZ9249sKd7SOKPbUDHGhcBBlAPWLsC3gWOBz9v+ba04Q/FOnuklYGfbG9SOuZJt+anthZXXeQfwZeA6yt8E8ALgBMC2/7pyvKX7g6Q3A08GPgU8B7jG9gEVYx0EvAlYH7htsBi4HTjS9kG1YjXxOt1XJH0beDtwJvBy4GXAc23/eNx3sUK8W4BB8hvsK/egfLa2fa/K8S62PUjurwOeavt5ku4HnFr77wNYp/YKW/SH5izFAJIWAHe2GG9tSXez/fsm3vrA3SrH+IPtLwJfbNa/K7A3cLik02y/qHK8JwMvBm4dWS6gehOXpJtneomSpGp7PPBu4GzgCNuW9FTbL2shFixLCgDPB55s+zeSPgWcWzOQ7XcB75L0rtqJfAad7ivAhra/3Dx+r6RzgC9LegnLknBNHwfuDfyj7f8DkPQT29u0EAvKQXjgGcBnAGz/oq2LvGlK7h8ATgQ2k/QOyhnZm1uM99/A1yX9F2Xn+mvg6Moxlv6vNmfqxwPHN5dtz6scC8pZ0W22v7HChkiXthDvRuAxgy/PSLyrawezfXbTHvxq4HRJb6SdxDCwvqTtKc2ba9v+TbMdf5D0xzYC2j5I0hbA1gx9f21/s3KorvcVSbq37ZsAbJ/RNKt9FtikdjDbr5a0A3CspJOAD9HuvnKjpOcAPwOeCOwHIGkd2jnRmZ5mGQBJDwWeRkmKX7d9ScvxdgGe3sT7iu3TKq//H2y/t+Y655PmHsXJtldoM5V0qO03thh7C+DfgMW2H9BSjDNGFr3I9s8l3Qc4rY2iUJLeTbm6+wEwOIDY9lTfOJb0IuAK22eOLF8IvMX2K1qKuxbwKmBP4IG2799SnAdTTlDvB7zf9seb5X8OPNP2G6rHnJbkLmnc0fsW239oIdbalC/n02uvez6QdF9gC8qZyrXjzqxj9TX7z91s3zbrm1d93ZcCjxg0F7btrrKvSNoc2N72KZPellqmqVnmXGAr4NeUM+mNgJ9Lug54he1zagWy/UdJtw1fJrahaX45iNIEM6jPfB3weeDdtm+sHG974COUtsafNYu3lHQj8Pe2q7YTNzHvTblZvDRBUA6cVf+2oVidfZ5DcRdT9s07gMts/5BlNz1ruwJYF2g1uU9iX1nJthxpe/8W1vtQYDeG9s2m3b16i4CkVwD/Y/uypifVUcAewJXAvrbPqx1zmpL7l4ETB00jkp5JSRrHU7pJPrZyvN8BF0r6KvCbwULbr6kY43jgdMqd818ANHfP96XccKndn/i/gL+xfdbwQkmPa16r2vNI0l8BhwBfYVmC2Al4p6R/sf2JmvHo+POU9GfAYZR7CzsA3wE2lvQH4CW2q99XoBw0zpf0dYYSfOX9ErrfV2ZqVxfw7JqxmnhvpPRSOw4YNBtuSWmDP872uyuHfC3lJi5N3EcA2wDbU5prnlw53lQ1y6xQ2H6wTNL5th9VOd6+45bbrnZTVdKlth+yqq+tQbzLbG87w2uX235Q5XiXAo8dPWNu+jCfZfvBteN1/HmeR2kvvV7SNsD7bO/e3NT9R9vPrBmvidn6ftnE6Xpf+SNlZrbhriNunm9he73K8X4EbDfarCtpPeDimf72NYi3NEc1vanOsv3vzfOqXawHpunM/YbmaHtc83wv4NdN+2b1LpG2j266Jy603UbvAICrJP0TcPRQd6z7Ai8F2jjrO1XSlyiDJwbr3wr4K8qVUW1ifA+EO1n+S1xL15/n2ravbx7/lNKDBdtflfT+FuJ1tV9C9/vKFcDTbP909IU2elZR9sH7s+JUn5vTThfrO5t2/V9TOoW8Y+i1VnrLTFNyfxHlEv+k5vm3m2VrAy+sHUzSrsB7gfWAbSQ9Cnhr5V4JewEHAt/QsmHI/wecTAt/k+3XSHoWy9oZBVwDHN7SjaR3AOdK+grLEsRCSvPI21qI1+nnCSyR9DHg65TP9H8AJN2Dsl9W19F+OYl95f3AxpSD5Kh/bSHe6yhdnS9j+X3zQZTeM7UdDCyh7Bcn274YljbtXdFCvOlolmnOzt9t+x87jHkOsDPlJshgROyFtv+0q23og6YJ5s9ZPkGcZvvXE92wCiStC7wCeBhlOPlRzc349YHNbK/OBPCzxcx+WUnTDXJHlt83z7bdyhiFpk/7PYf3/cGJgO1basebijP35gvTdf2MO2zfpOVHj3V2JJT0Mtv/VXmda1OGdm9JGfL83aHX3my7du0cmh35uFnf2LI2Ps+mvfbDY5b/lhUv92vpZL+cxL6ykm15hu2v1l6v7Tspg7VG421oe3Rkbo14d1CaZWh6zOxEaX3YFbhv7XjTVDjsPEknS3qJhopDtRjvIpWBFWtL2lbSB4HvzvZLFf1LC+v8D+DPgF8BH5T0vqHX2vwsV6BSV6dLbXyeM5J0akur7mq/nDf7Ch0XtaMMEGuFpMdK+nfKwf9k4FvAQ1uJNQ3NMgAqZQBG2ZWLQQ3Fuwfwz8Cgx8NpwNtt/65ijAtmegl4sO2qtWwkXWD7Ec3jdShnnZtSumad6frFmWZKAqLUflkww+urG6/rz3OmHg4Cvmh785rxmpjD+6Uo++Xbau6XTZyu95WuC5W9fiXx/tl21ZIHKiVTXki5p3AspZTKErdXy2Z6knuXVIqSbQ1c3tbAlybO/1Hao0fbnwV8t/ZQaEk/tP3QkWUHN9uwWQvdv/4AfJLxzQYvsH3PyvG6/jz/CHyD8T1/Hme7lV4QXZjAvvJrZi5U9mnbVZstJP0OeA9l4NmoA2xXLcEt6XrgUsqN4y/a/p2kK9xSaQyYkjZ3AEl3pxTb2Q64+2B57TN3SS8H3gn8mNIbYX/bM51VrKkvUqrhnT9mO/6nhXhLJO3iZdX3sP1WSddSRiPWdgHwXtsXjb4gqY3SDl1/npdQBvpcNiZeG933BqNh3wQsYvnCYY+oHKrrfaXrQmXnAid5zMj2JgfUdj/K1dY+wPtV6hKtL2mdpi2+uqk5c5f0GeCHlBsQbwX+ErjE9msrx7kI2KkZmPIA4JO2H18zxl2FpCcDV83Qd3mx7SUT2KxqJL0AuHBcf3NJz7N90phfW9OYlwL/CFzIUH/sNnrm9JmkhwA3DI1TGH7tvm6xhk5zovocSqJ/EqUIYu3y3lOV3M+zvf2gLbDphnaa7Z0rx1lutNjo89qau+aD7liD2ivfc0v/MRpTT4PS77bVCptd6frz7Jqkb9t+UkexOt9XdBcpVDYg6V6U2liHVV/3tOzzkr5ne0dJ3wT+HvgF5Utbtc1KpRDZcNe9vYefu2IND5X6OB8GLmOoOBNlIMXf2/5KrVhNvOF6GtcMxdsbqF5Po7kRtx+wO2U04CBBfB742OjQ7wrxuv48Xw/cZPtjI8tfTem7XH2UqqSnUf4PR2vLfK5ynK73lbGFyih1e6oXKtOEiszNsC3VZyWD6UruL6cU7n8EpXDRhsDBto+oHGds7Y4B160tcwnwLNtXjizfBjjF9p/UitWst+t6GsdSvpxHs3yC2BfYxPZeleN1/XleBDza9u0jy+9GGQxTux0cSf9N6Tp3McuaZar3GpvAvnI+Mxcq+w9Xnk5T0mmUInNHe8Uic0+33dkk4JKutr1V7fVOzQ1V2//ZPPwG0Nod5prJew7WYVnSG/YzSlnX2rqup/For1is6xrgTLUzGXHXn6dHE3uz8PdN81AbHuluRqN2va9sMJrYAWyfKamNuX0X2T50JNYvgEMltdK9eiVaOcOemuTenA3twYq9BN5aOc4XWMmH7bo1PI4CzpZ0HMvXt9iLdgZudF1P49eS9gQ+24wGHAz53pMVuyvWMO7z3IrSlNDKQJhxN9+aduO2nCnpYbZbG2jT6Hpf6bpQ2VXqsMicyqC9cXlFtDA6FaarWebLwE3AOSybXozaNyJUCvnMaFxXrTWM9zDguSxf3+Lktr686rCehqRFwKGUWijDk6ycDhxo+yctxPwTVix21crnqVKv/jXAG1g2IfYOlEJXh7dxFdg0PT0Q+AmlzV2UK4g2moC6rr0yrlDZyW6hUJlKzaMDm3ijReYOtX1D5XjbUpL46IFja8qN48trxoPpSu4X2X54xzHXAwY1xy+tfQNwhpib2v5li+tfCNxs+8Ym+S6mdCm9uK2YTdz7UPa31v62SWgS0oHAwylnZhdTbsi1Un5A0tbjlrfRFXJS+0ofSfoi8CbbF4wsXwwcYnvX2jGnqbbMdyV1VvlO0lMpvS4Op/TA+JGkp1SO8SxJP5H0bUnbS7oYOEvSNU2viKokHUi5Z3Fmc4P6y8CzgOM183DsNY25o6TH2P4VsJmk1zcJsY1Yuww9vrek/5R0gaRPtdVUYvtU239m+z62N20et1VXZpDEN6IUm9oV2KilxN75vrKSbTmypfU+VNLTRtv0h/ejihaNJnaAZqzHohbige15/UMZrHEBpZjPHyhDeC8YLG8x7jnAQ4aePxg4p3KM84E/AR5PKdD0uGb5nwDntvA3XUyZGOA+wC3Agmb5BsBFLcQ7hDLycAnwLkpzzMHANyn1O2rHO3fo8X8Cb6dc9h5AGY1YO95Xhh4f1MZ+OCbma4GLKAP53tp8D17dg31lkxl+7gNc00K81zS55CTKPKa7jduPKsa7fHVeW5Ofabih+pwJxV3XQyMPbf+oGThV051uBoRIus32mU2sS5r2ztr+aPu3km4Hfks5oGD7Ny117ngB8CjgbpRxCVvavlnSe4CzWH42mtoWe9nUi/82WxfX1TRc+GxPygGsbftRpi78DYCkQ4H/BT5YOU7X+8r1zDzN3mZjf2PNvALYwfatTZPTCZIWuUx918YfeLakV9j+6PBCSftRTiSrm4bkvhmwqUcudVVmpLmW9upmD2bZOaZ5/pfU/0+4UdLfAPei9Cw5gDLJ89NZsYBSDeeqzN+4AWUQzNHNjeqdaafM6R0uN99uk/Rj2zdDqXcuqY3udJs1TQYC7iVJbk6NaKcJchI3rMRQh4LmcRvJqOt9petp9tZ2U7Pd9pVNM+wJzT2NNj7P1wEnShrOI4spM2rt3kK8qUju76F0Txp1CXAkZWdrw98Br6RcvonSlLDCxAxraF/gzZQkMSgqdBrlgPWKyrGgTL6wZxPvBEpPiBdRLk8PbyHe7ZLuYfs2Si8SYOnowDaS+0eBQaXJoyklaq9vBqesUEysggeolKrV0OOlXHnqu8Z/Ue7LnNg8fx7tdPPsel/pepq9X0h6lJsic80Z/HMo3Wmr39tz6W75BEk7UW6+A3zJ9um1Yw3M+94yWskUYpK+78oj16IeSXez/fsxyzcFNrfd9YQdVXXdbXYo7qMpBacEfNP2eW3E6TNJW1KuLH8x5rUn2v7OBDarqmlI7pfbftCqvrYG8Waa8AGoX1q1OZLvQRmwcQelh85Hbf+4Zpwm1rnA54Bj21j/DDE7K+QlaRPKAJtrKWezb6LcrL4EeKcrz9sq6V6DpqYxry0c18SwBrFWOnmE6/fLnsS+0mmhshm6ev7QY0pUT6Np6Ar5NUnv0MhdHEn/Qul9UdudlHbMYygzp+w68lONpHdTRuCdSekJdAWljvwJKiM7a9uY0o3uDEnfk3SApKoTWAxTKeR1GfD/gGcDf0GZ7u6y5rXa/pvSRrwDcAalhvahlBuCH28h3v8MHkj6+shrtcv9/pLStLSk+Tln6KeN0sld7ytvpBQpE/A94Ozm8bFNt8za8Wbq6vnprrt6tqaNLjiVuxBtQJmW6seUwmGfBS6n7AgbthTzoZQkdC4lYTwbWKeFOBcOPV4H+E7zeGPa6W423FXwyZR7CL+gJML9W4h3CaV/7+jybSiDYWrHO7/5V8DPxr1WOd554x6Pe14h1r8D32/+z55Mc9Xd1s8E9pUfUXqojS5fD7ishXiddvWcxM+8P3O3/Rvb+wDPoJx9fRx4pu29PTRDuaTtKsb8oe1DXOq4f4FS7+KAWusfcufQ5fb9gbWb+IOh+q2x/S3bf0+5BD6U0nxRW9eFvNZSGVa+FbBhc6k9GB27XgvxPMPjcc/XLFCZlOZRwGeAl1AmjP9XlYqXrepoXxkUKhvVVqGyP9r+LaVq6XJdPVuINRHT0FsGANtXUJotZnIMUGVSDUlbUIpN7U6piXIAZULb2t5J+ZJeSrla+Lsm/gLKWVptK1RidOmq+GXaKc7UdSGvd1Fm6wL4a+A/JRl4GOVKrLbhrpebDV3Oi+X7wFfhcmp5hqTzKJ/h22ju0dSORff7SteFymbq6vk02unq2bl5f0N1rtTM1FRhPd+gdKc7ntIFbLkbVa5/42oTSgnjVifjnhR1WMiribc2Zb++Q2WykEdRmmh+3kKsQ1b2uu1qB5RmiPxulIo8fcObAAAWOklEQVShCyg3Oz9tu5W5WidB3Ra1W4flu3o+ltIV+aeUom9Tfwbfp+ReZTo8SVey7JJ6+MMZVN9rrZb80DY8BPgH21X7us92o8j2+2rGm2Vbqnc3a7oIzsiVZ/OZK0kH2V6j0auSfkM5Sz+Wcs9puS+u68/E1Om+IukewB/cFOdrvgPPBq603cZV82j8dSn9z39m+7q243UhyX2CJD0CeC+lrfEkyhDyD1POIg6z/W+V491J6XFxKsvKxS5V80yzibc2pcfRFsCXbV/UDBR5E7B+jSutkXh3Um6UDSY9Xm4ouyvPtztXNfZNSR9n5nZ8u/5MTF3vK98E9rN9maQHUXrMfJLSpPY92wdVjncE8EHbFzeD6v6X0ktuE8qJ1bE1403CVCd3Sfe3fW3z+Ezbj6u47t2B023f1DzfCHiqK85oL+ksyryR/wvsAvwT8CngLbZ/VyvOULxHUdpqd6F0oTuWMvN6W5Nxf5zSxv49ygHrKsrNuANrfo5D8Q6gjBm4idKb6sThm+6TUqvJsEsT2FeWDlaU9DbKNIyvVCm7fY4rzz4l6WLb2zWPX0f5bj+vGc186rT9f40z7cm9lYllm3Wf72WFpwbLqn5JR2Oo1NBY1EYb45jYT6C0MT4deKPtk2f5ldWJcRHwCNt3Sro7pa/2gzxmVGDluNtQ/rbdKAeUd7oZZj4Jlc7cX2z7v2dqLmmzSa2jfeUCNwMEJX0HeM/gBKCNkejD32WVGaA+Y/vjo69Ns6npLTODNrsLjusmWvvzurvKrO+Dv+NW4BGDAVtttRE3vXG2p9TQuIYy63sbbnczvZ7t30n6UduJvYn1E0mfp/RjfgmlXPPEkjt19tNBzfF7rvRdlXW4r1wg6b2UbrIPAr7SxN+opXg3Nk2EPwOeSKm2ObjRun5LMTuVM/eZ130UpQ/s4ZS2zlcDG9t+acUYZ6zk5eptxJJeRultcXdKD4Hj27x5JOk2ys0/KAnugc3zVqaGk/QASlPCbpTudMcBX2yjiauJ9yrbH5rD+95k+51tbENbJrCvrE+pVb85cJTt7zfLnwA80PYxK/v91Yj3YOADlFHM7x86a/9zyjiaN9SMNwnzPrlL+iAzTyy7r+17tRR3A+AtlEtRUc4k3j7NXaSam2QXsqzy3miPi6pVDDXDlHBD8aqWa27+vguAzwM3s+LfV7uHR+c38Zsz6Vew4kTxbdxQ7XJfeYbtr87w2qG231gz3l3BNDTLrKxuRhs1NYClI9Wq17SYC0nPAP7J9jMqr3qnyutbqUHybtrAt6MkiEuaAWlteCvLktCGLcWYtM8D3wK+xvJ13WvrdF8BDpd0gO0vDRY0/d6PopxdVyXpX4ErbB8xsvwA4H59OJjM+zP3lZH0Xtv/UHmd77f9OklfYMwVQ80zFkk7A0ewrCvkOymlDgS8o4W+y/9E6WLZ+g3bJt69KNPdLaa0eQt4JKX3xX6eoaLitJB0B3DbuJcozU7VryrH3ehvwwT2lUWUka9vsv255gb8CZQrsH1deXJ6ST8AHj64JzS0fC3K9J0PH/+b02MaztxX5oVA1eTOspmX3lt5veMcBuxP6Qr5LEp1yLe4TPXVhq2BcyS9svYAohl8gDKUe+/Bl6i5WfwW4EOUipjVNAlhL0rJiC8A/wg8hVJ07m22f1kzHqXwW9e9Kr4o6dm2T2k5Tqf7istsSE8HTpO0GeVG+Fm226rQ6NHE3iy8c9ChYdpN+5n71ba3mvR2rK7RNluVqege2HLMR1MGS/2Q0sd+6Q5eu3eOpMtsb7uqr61BvOMppZM3oKmsSUnyTwIeZbvqfLxddpmTdAvL5hTdALid8rdCe1cJXe4rg+/B5pSr168yNANTC/HOBl5k+7KR5dtSatgvrhlvEub9mbtmnqRAtNAVUtKFrKSiX+UeHhtJev7y4Zc9r90s06zzXEn/TCmd/ECWL7VQewRn12dAD7P98KY72zW2BzMlfVlSG4XYPtPCOsey3WkXyCZml/vKYUOPLwDuO7SsjXgHA6dKejvLz2l6EKWI2dSb98md8sEPzlhG3d5CvMHZ3Subf4cnyB7XvromvsHyE4AMPzelOFQ1zeXuYZRCZTsPupu16DuSDqY0iSw9YEp6C6UJqrbbAVyKhl078lobbcfrNn/fOLb9thZi0pwAPImyj3yrpdG+ne4rtru+2X+qpOdRmu5e3Sy+GNjDUz7948BUN8u0SdJ3bD9xtmXTRNIVwLsp0/i1/h/f3FD9GKUU8/mUZLQ9cB7wcleuginpOpbN5rNX85jm+Qtt37dyvHF9oe9BmVz6Prar99iR9GHKIJ9B7ZO9gB/bfuXMv7VacbreV54/ssg0s0/ZvqXt+EPbcXdgV9udXZW1Zd4nd61Y6c/AL91yqVNJ5wOvsv3t5vkTgA/X7KkwZij5YIf+tu2f1IozFG+B7evHLN+KctPzPbVjNut/IKUAlICL3dKcnJL2Xdnrto9uI24T+56UQTj7UcpFH9bGoB9JF1N6ebh5vhblxm61yWqa9Xa6r0j6rzGLNwEeQelZ1caUmoPYawPPpJRY+HPK1dAL2orXlWloljlszLJNVAoK7eP2aobsBxylUjEOymjVqgNFGD+UfBHwz5L+n+3jxry+2oa/rJI2pdSz3odStbF6WVU19VBs/1jS/YZ7Xcx1dOeqGE7ekjYsi9oddNbcE3o9pdnuaODRrjwR94hLKZNYDAaAbUVpo66q633F9svGLW8Gwh1PKTxXlaSnAC+izO37PUoZgm1s125+nYh5f+Y+E0mLgffZfkrLce5F+ZxuajPOSMxNgK/VHv3YnF3uTtmhH0z5ku5le8uacYbiLe0NNKZnUCujOyX9HeWm2KAWy63AobY/3EKs9wDPB46kTPDQegVKlclkHkNJRjSP/5fmflCtcRhd7yuzbEv1fUXSNZTRtx8BTrJ9i6Sf2G592sKuTMOZ+1i2lzRnZ1Vphup7g66v7mBCC9s3tNTX9jpKUngzpenHKqWN26IZHo97vubBpDcDT6CUb72iWfYA4N8lbWL77ZVDvoFS6/zNlKutpZtCS90TKb08utD1vjKWyqQdv29h1Z8Fnke5Z/FHlUJz03mmO4OpTe6S7ks7/xkTqb43rBm52sal/ZsohbU+AnxK0qdbiDGsswmkGy8BHumhQmG2r5D0QsqctFWTu+3OJ5i3/Q1YekU5XFum6vSPdLyvzDAifBNKv/cX145n+7Uqddx3ojQ3vQe4V7OvnNLFVVjb5n2zjMYXDtuEcob2Wttf6H6r6pihT/0mwLWUIdeXtBT3AZQdem9gW+AQysQWK0yKvIZxBlUhhytC0jx/gO0NZvrd1Yx3qe2HzPDaD20/tGa8SZC0P2Vi7N9SBhW1Ov1jh/vKn40sMvAr4DLbbXR5Ho2/LmVikn0oVSE3bTtm26YhuY/2gBj8p5/dRm+EobhHUw4eNzbPN6b0gKh2U1UrVk008Ku2bwKObMOfUtpVX1h7dOyYv285rl8V8uuUiTm+PrJ8Z0pZh66LYVUn6TLg8a5fSmEusdvcVx5nu42xD6tM0lNsf3PS27GmpiG5L7T909nfWT3uCkPLuxhurlJq+HmUodF/0VKMjShnYQA/6vJmcZskbUepmvhtlg1+ewylF8Ruti+e4OZVIenLwPO76tHR1b4ycvP9f20/vo04Q/E6nd93Eqahzf0kyiAYJH3W9h4dxV1L0saDbm1ND5ZWPq+mW+ezKWdFu1Bu9hyx0l9a/ThHUg4eP6Fc0m8t6UTgb2tf/krajzIX5nua5z+j3MsQpaTxR2rGc5ns+OGUz3G7Js43gb9xSxN2TMBBwHdV5t9deqPR9mtqBul6X2H5G+x3r7zucT7Gsvl9PyCp1fl9J2Eakvvwf3or7YozOIzyJTqBcgb4QkpJ3mpU6rYPBk6cQSl1sONMfX4reDOwLrDVYNRf0+XtcEqlxrdUjve3lIPVwHW2t2hGAX6FcrOuKpfp/M6g9PYY1I/vS2IH+A/gdMpEGitUNayo631lrabpc62hx0u/+y3cMF7MBOb37dI0NMvM2Fe6g9gPoxQsEmXm9x9UXv+dlIkXXjoYkSrpihZvjl1EOXjcNrJ8Q+BMV65hLekc2zsMPV863Zyks20/pnK8Qf34HSjlDtaiR/XjASR91/YTOojT9b5yJctuEI+qfsO4q3EXkzQNZ+6PlHQz5T99/eYxtNuXGMrKfwD8oGkH313Seyq3g+9A6YXwNZVaHscBa1dc/6g7x7XV2r5VUhtH+XsPPxlK7GsB92khXqf14yfkjKbHzBdYvlmm9pltp/uK7UVzeZ+k7SrdO3mopMHIXgEPbJ63Mr/vJMz7M/dJmaEd/HNtdb2U9ERKE80elLPOE20fWTnG94GnMv7s6Azbj6wc78PADbbfPLL87cCmtv+2crxO68dPgqRxNYfaOLPtdF+Zq1pn2F335JqEJPcRY9rBPw18cK5nFhXirwU8g3L2+bJmWZWzlQlc+m5AaSZ5DGUQEZRmkiWUqpBVB4pIutz2g2Z4rRfJfRxJ67VwM/xKOtxX5qqLHmsj8VrvudOWJPcRXbeDz3Gbur7XUOvSd7C+B1B6rwD8wCNVISsevI5m2ZR6o/XjH2z7JWsaY75ompt2olxZ7urK5YxXYTuq7itziNf1d6HTg0lN09Dm3rWu28HnousZjY6h6X5ag0udlys6iPdqShe3y1VKNg/Xj9+vwvonTtJjKQl9d8po5ldSJpyYlKr7yjw0tWe/Se4jbJ9HSQZvHGoHX0/SqbTQDj7Xzeo4XtcHkyrxmt4we2r5+vFvbOtKoUuS3kHpjvtTykQdbwWWuMUa9XPU9b7SeimCvui88NE0sf0d26+ijGJ7P2WQA7B0NGRfdX0wqRrP9o9tf8H2yaOJvXHMmGXz3f7A/1HGBvy37V8xP84qq2yDpK21bO4EJO0k6d8lvb7p3FCC2Y+rEW9VNq3jeNUkuc+B7TttnzYyuKjVBCHp/kNPc7ZS1zR+Ye8HvAN4LqXZ6RhK1+C+XH0fT1ORVdKjKJOP/5RyA76NWvxfmeNbp/Y+TV92jEloO0GcSZlxp5OzFUn3tz2YVLqLKnydxhsxH854V4ntPwKnAqc2IyqfQ5mv9WeSvm77RRPatFr/d+sP7Q8vBo6yfVjTe6yN2dYWzOVNti9qIXYnktxXX9sJouuzy04PJhOI1xtNOYUTgBOakgCjk0uvsaYf+I1uCoVJ2olSZ+Yq4EODrpcV/++G9/edKTV0aMoDtPFduLdWnJR7KdufayFmp5Lc56/cRK0ZbLJXCtVIuhtloNsi2v3+Hk/pkXPTUDPJu1jWTPLyyvFOl3Q88HNgY0r9HCRtDrRRG+jelKufsf34gST3u5LaCULjJyKBssNttKbrX0VTfRN1DvpypfB54CZKvZw2pp8b6LqZ5HWUKe82B55k+w/N8m0pXT5ru8oV52aYj5LcV03tBLFkNV9bLV0fTObZwWsab6KOs6XtXWZ/2xrrtJmkGXR2HJQbqpJeS+n6+RNKT7XaHiLpiba/M7xQ0pOBa2foZTVVktxXTdWdemV9lCW9t2asRqcHkwnEW5mpu4k6g+9K+lPbF7Ycp9NmEkkPpgwe3Icy09qnKSPo25o96yzgljHLf0s5mOzaUtzOJLmvmi4TxAuBf6i5wq4PJl3Hm2dXCm15EvDSpoDY72mvimHXzSQ/pJT92NX25QCSDmghzsBmti8YXWh7iaRFLcbtTJL7iHmUILpuRqh+MJlAvPl0pdCWZ3URZALNJHtQztzPUJlK8Dja/Q6sbLan9VuM25kk9xV1liBUpu4b+xI9773SRrwJNHN1blCKVtJmtDgdXdfNJLZPBE7UsjmEDwDuK+kjlLIfcx10NFdnS3qF7Y8OL1SZGvKcyrEmIlUhV4Gk99qudrbZXFqb7krwruxg8n3bW05zvFm25ae2F3YVry2SnkuZAvL+lKkEt6ZMJVi1HMZQddT9hppJOq2O2uw/ewJ72d658rrvC5xI6fU2SOaLgfWA3d2D6faS3FfBtCeICRxMOo03y7ZcbXurruK1pZlEY2fga7a3bwYX7WN7/8pxdqecuT8BGDST/KftbWrGmbTm8xtMGXix7dMnuT01JbmvgtoJQtJoqVQDv7R9da0YdyXz6UqhLZKW2F7cJPntm66J37O9Y0vxBs0k+1AOKkfTTjNJVJY29xEdt4MfNmbZJk0VvH1sVx0s0vXBZAIHr3OY+UphakeljrhRZZLqbwGflHQdcEdbwWz/BvhkE2vQTHIgkOQ+z+XMfcR8aEqQtBh4n+2nVF7vGWMWb0JpZ2zjYNJpvLuC5kz6t5SKrn9JGUb/yaYEcMRSSe7zVJfTibV1MOk63l2lmasp6rWt7a9Jugewtu1xA3LiLizNMiPmQ4Jo7uR3dtRtBm5s2IN4nTZzTYKkV1Am7tgEeCBlIpkjgKdNcrti/klyX1FnCWKGAVObUHoovLZWnDlsR6cHk7bizdQHu7lS+ADQyZVJy14J7EgZPo/ty5o+7xHLSXIf0XGCGB0UZcqAkdfbvq5iHKD7g8l8OXh1fWXSst/bvn1Qu6uZiSltq7GCJPc5ailBnGH7p5XXuTKdHkwmEG+srq9MWvYNSW+iTLH3DODvgS9MeJtiHsoN1TlqEsQptneouM6lN00lfdb2HrXWPUO8hV0eTCYQb6VXCranPgk29dT3A55J6dF1GmVwUb7IsZwk9xFdJghJ59nefvRxWyZwMOk63r4jiwZXCmd3eaUQMR+kWWZFXTYleIbHbRnuu9/F0P+u43XdzNUZSSuUpx3WQsnfmHJJ7ivqMkE8UtLNlCS4fvMYltXovlfleF0fTLqOdxLQ2ZVCx+6kfIaforSx/3aymxPzXZplRnTdlNAlSX8EfkNzMAFuG7xECweTCcTrtJmra5IeSqnxsivwA0qi/4rt1soPxPTKmfuKum5K6Izttfscj+6vFDpl+4fAIcAhkvYCPgEcCrxnohsW81KS+4p6nSB6rutmrk5J2oJShnd34NeUCS1OnOhGxbyVZpkRXTclRMyFpG8A9wSOB04Abhh+3fYN434v7rqS3COmgKQrWXYlOfylHZx09KoJMdZckntEj0jazvbFk96OmLy1Jr0BEVHVMZPegJgfktwj+qX2bGExpZLcI/ol7awBJLlHRPRSkntEv/RlIvBYQ+ktEzEFmnlTb7R9U/N8J+B5wFXAh2wnqcdycuYeMR2OBzYAkPQo4DPAT4FHAh+e4HbFPJXyAxHTYX3b1zaPXwwcZfuwZvKOqZ/4O+rLmXvEdBju4rgz8HUA23eS7o8xRs7cI6bD6ZKOB34ObAycDiBpc+B3k9ywmJ+S3COmw+uAvYDNgSfZ/kOzfFvKNJARy0lvmYgp09xQfRHwQuAnwOdsf3CyWxXzTc7cI6aApAdTarnvQ5nT99OUk7OdJrphMW/lzD1iCki6E/gWsJ/ty5tlV6TUb8wkvWUipsMewC+AMyR9VNLTSC+ZWImcuUdMEUkbUEam7kPpEnk0cKLtr0x0w2LeSXKPmFKSNgH2BPayvfOktyfmlyT3iIgeSpt7REQPJblHRPRQkntERA8luUdE9FCSe0RED/1/qcxo78lXaY0AAAAASUVORK5CYII=\n"},"metadata":{"needs_background":"light"}}]},{"metadata":{},"cell_type":"markdown","source":"## understanding data with visualization\n#### Data can be visualised in many ways that is univariate plots and multivariate plots #### Used the Histogram for univariate plot as shown below and the correlation matrix plot as the multivariate plot as shown above"},{"metadata":{},"cell_type":"markdown","source":"## Histogram\n#### This helps to understand each attribute of my dataset independently"},{"metadata":{},"cell_type":"markdown","source":"## Data pre-processing"},{"metadata":{"trusted":true},"cell_type":"code","source":"plt.figure(figsize=(18,18))\ndata.hist()\nplt.subplots_adjust(bottom=3, right=2, top=5)\nplt.show()","execution_count":16,"outputs":[{"output_type":"display_data","data":{"text/plain":"
"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"
","image/png":"iVBORw0KGgoAAAANSUhEUgAAA1wAAAJvCAYAAABvfNBzAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzs3Xu4HFWd7//3xwS5BQwB2T8MaOCYQcEIAxHwHsULFzU4AwoyJFGYzIzgjYwSdUZRcX7BERFvKAoCigIiAgoqHGSfUY+gBJCIqESIEAiESwgEUAh8zx9rNal0uvfuvburu3bvz+t56tndq1ZVfbt79+paq9ZapYjAzMzMzMzMOu8ZvQ7AzMzMzMysX7nCZWZmZmZmVhJXuMzMzMzMzEriCpeZmZmZmVlJXOEyMzMzMzMriStcZmZmZmZmJXGFy8zMzMyswiQNSjqqybrnSlojaUK347LWuMJlZmZmZqXLlYZVkjYupG0v6fuS7pO0WtISSfOG2c80SSHpurr0bSQ9LmlZOa+gYSyzciwf6tYx60XE7RExKSKezDEN5ph2K+aTdFFOn9WTQJuQdLykb/c6jjK5wlVxJRROa/Jyj6QfSXr9CI77z5JurkvbWtJKSfsVCp0L6/a1W04fzM+fW4ijtoSkBYVt3iPpNkkPSbpW0isK6yTpREn35+UzkpTX/Z2kiyXdK+kBST+VtHNdPB+QdHd+786oez2fyu/nWknHD/Wemo1nJZZNayT9Nq9r+COc8z+/EMdRhXWzclyH5uezJd2Qy5L7JF0paVoh/6jLA0nvkPQXSY/kE5kphXXH5LLrb5LOrNvumZIukLSs0cmPpA9K+p2kh3M5+MGh3kOzsSB/714JBPCWwqpvAXcAzwO2BuYA97S4280lvajw/B3Abe3GOkJzgQfy3yr5E+m9BNL5GrAPcG/PIhrHXOGqsJIKp8kRMQnYDbgC+EH9CVGz40bE14HlwMcK2T8PXBYRP8nP7wVelr/YNXNJX/zafmotMZNyLDOAp4Dv5+PvDSwCDgaeBZye46xdKp8PHJRfw4uBNwH/Unt9wCXAzsAA8Gvg4sJreyOwENgXmAbsBHyiEOtS4EPApRu+dWYG5ZZNedlt+OwN43oDcBHwrog4N1fKzgYWkMqSHYGvkMqbtsoDSbsCXwOOIJU1j+Z919wFnACc0STcXwD/BNzd6KWQ3rutgP2AY2oVSLMxbA5wNXAm61dOXgKcGRGPRMTaiLg+In7c4j6/VbevOaTv/NMkPSc3BN2bGzDeW1i3l6RfSXpQ0gpJX5L0zML6kPSvkm7JDTlfrjXw5vWbkc5VjgamS5pZWFdrTHqnpDvy9v8q6SWSbszH/FIh/zxJv5T0xdwA9AdJ+9a93uflPA9LulzSNnXHmljIew7w9sK502HAD4DHC8fcWNLnJd2Vl8/XGp1y49VySR9SalhfIekgSQdI+pNSo/ZHCvt6hqSFkv6s1Bh+fq0RqhDfXEm358avj+Z1+wEfybE+3eDWdyLCS0UXUsXml8DngB8V0tcAu49wX9NIJ0cT69L/nXRC9IzhjlvYzypgd+ANpJOKrfK6WaQK2VeBo3PaBNZV0gabxPZx4KrC87cDvy483zzHvl1+/n+B+YX1RwJXN9n3lLzt1vn5d4D/KqzfF7i7wXbfBo7v9f+AFy9VXLpRNuV1xwPfbpAewPPz40HgKFLDy4PAAYV8BwM3DHHsUZcHwH8B3yk8/1+kE5kt6vKdQDqZbBbDcmDWMO/RF4Av9vpz9+KlnYXUgPFuYE/gCWAgp//vXJ4cCjy3xX3Vyo1ppEaeCcALgT8CrwOW5XzPABbnMuuZpEaVW4E35vV7kq76TMz7uhl4f+E4AfyI1Jj7XFKj8n6F9UcAK/Lxfwh8oUGMXwU2IZ0z/ZXUKLQtMBVYCbw6558HrAU+AGxEOhdaDUzJ6weBPwN/B2yany+qO9bEQt6jgMuB/XPar4GXFssc4JOkSvC2wLNJ51efyutm5Xg+luP55/z6vwNsAeyaX89OOf/78762BzYmNUh9ty6+r+fYdwP+Brwwrz+eBmV9Py2+wlVtc0gtFOcAb5Q0kNOvBr4s6VBJz23zGBeSvmjFbnfNjktELCN9+c4gfZneHRGr6vZ5NusuY78RuIlUMWtmDnBW4fmPgQmS9s4tM+8CbmBdS/CuQLEF5Lc5rZFXkU6g7h9i24G6K3JmNrRulE0j8WZSpejgiLiskH4d8AJJJ0t6jaRJddu1Ux6st21E/JlU4fq70byAZnJr+itJ5ajZmKQ0LOB5wPkRsZhUcXhHXn0I8HPgP4HblLoAv6TFXS9nXSVrLnVXt0hXz54dEZ+MiMcj4lbSSf+hABGxOCKujnRlbRnpvObVdftYFBEPRsTtwFWkBueaucB5kcZOfQc4TNJGddt/KiL+GhGXA4+QKiErI+LO/Lr/vpB3JfD5iHgiIs7Lr+3AwvpvRsSfIuIx4Py6WBo5G5ijNLRickT8qm794cAnczz3kq7wH1FY/wTw6Yh4AjgX2AY4JSIejoibSOXSi3PefwE+GhHLI+JvpErUwXVX3T4REY9FxG9J5eeoejOMRa5wVVSJhVO9WkWodtl3qOPWfIn0JbwhIi6q32FE/F9gSv6Cb3B5v0jSK0ndcS4oJD9M6l74C1ILyMdJV7Qir59EavWpWQ1MKl7mz/veHvgycGwhudG2kFprzGwYJZZN9+UuNg9K+vcRhvUaUrflXxYT88nVLFJL8vn5GGcWKl7tlAf129a273RZcjzpt/qbHd6vWTfNBS6PiPvy8+/kNCJiVUQsjIhdSecDNwAX1f+mD+Fs0tWhw0gNL0XPA55TKFseJHVfG4Cnx33/SGkc50OkK9fb1O2j2O33UdJ3H0k7kMqec/K6i0lXsg5kfcVu1Y81eF5sCLqzcK4D8BfgOcPFMoQLgdcC7yF1v6z3nHyMZse7P1cma7EyRPzPIw3/qL3PNwNPkt/rUcbfN1zhqq4yC6eiqfnvA8MdtyYXBjczdIvrt4BjSIXRD4bINxf4fkSsKaQdRbqqtSupC8A/AT+SVCsE1gBbFvJvCawpFlKSnk26lP6ViPhuIW+jbSFV8sxseGWVTdtExOS8fDanrSV1ZXlaofX4iULyf5IaZy5SYdKLHNPVEfG2iHg26UrRq4CP5tXtlAf129a271hZIukYUqPVgbnF2GzMkbQp8Dbg1bliczep29xuqptFL5crnyWd9E/ZYGeNfZ9Uybk1Iv5St+4O4LZC2TI5IraIiAPy+lOBPwDTI2JLUmWs1XOpI0jn0T/Mr+lWUoVrzpBbDW1qXXn5XIbuITSkiHiU1Gvo32hc4bqLVFHqxPHuIHVfLL7Xm+QrecOGOspjjhmucFVQFwqnoreSLmH/cSTHbcG3SH21L8tf+A3k4x3C+t0JIV1i/mG+bP5UpAk5VgAvy+tvYv3L0LtRqPxJ2opU2bokIj5dt+9G295T6HJoZk10uWwCuJ3U979oR1KrafFH/BHgANLEGBc06NJTi+k3pBbf2qxm7ZQH620raSfSuIU/Nd1iBCS9izyhR0Qs78Q+zXrkINJ3dhdSF7jdSeOtfk7q7naipBdJmihpC1LlYGmrv8sR8QjpKk6je1T9GnhI0nGSNpU0IR+rduV9C+AhYI2kF+Rjt2oOqQve7oXlH4ED2ximsC3wXkkbSTqE9D5dNsw2w/kIaZzYsgbrvgv8h6RnK03A8TE2vErYqq8Cn5b0PEgN35Jmt7jtPcA0SX1bL+nbFzbGlVo4AUgayK2nHwc+HBFPDXfckbyAiLiN1A/6o0NkeytpkPtVdem/IRVYOyl5PWlcxO/y+rOBYyVNzVe9FpBmPULSlsBPgV9GxMIGxzwbOFLSLrli9h+1bfP2G0nahPTdmChpE/lGgmY1pZdNdX4C7CzpiPzdnELq8nNBRKwtZoyIh0kz+j0H+E4+sXqF0u0stgXIJ1RvIY01g/bKg3OAN0t6paTNSYPPL8xxkN+DTUiD6SfkbScW9r1xXg/wzLy+dnuLw/PrfH3uFmk2ls0ljT26PSLuri2k4QmHk64M/4B0PnAr6YrLW5rurYGIuDaPo6xPf5I0xnN30nTx9wHfIDXOQJo47B2kK9NfB85r5XiS9iE1Bn25+Joi4hLS5CCHjST+gmuA6TnOT5PGpbbVIBwRd0XEL5qsPgG4FrgRWEIa93rCKA91CmmW6MslPUwqZ/ducdvv5b/3q+7ean0jujxLh5fhF9JJxkkN0t9G6v96KnALqUvLvaQZdF44zD6nkS7ZriG1Bq8ktZrsN4LjTiyknQmcUJdvFrC8yfGPom6WQlLF6FMN8op08nI7qRC8GTiibv1nSN0gH8iPldfNza/zkfxaa8tzC9sfS2pNeYg0LmLjutcVdcu8Xv9PePFShaXksmmDWQrz+peRxnOuInV1OZ08M2pePwgcVXg+hTQY+xzSYO4f5u/7GmAZcCKwUSH/qMsD0ona7bm8uZg8m1hed3yDbY8vrF/WYP20vO42UpfJYhn21V5//l68eClvIY1D+0Wv4/BSzlI7STUzMzMzsx5QuifqURHxil7HYp3nLoVmZmZmVimSDle6EW794lsk2JjT1hUuSR8gdRULUt/PdwLbkebqn0LqC3pERDyeZ446m3STufuBt0fjAXw2Srnf/9carPpLpFnDzMy6zmWTmZmNZ6OucEmaSupXv0tEPCbpfNKYoANIA4fPlfRV4LcRcaqkdwMvjoh/lXQo8NaIeHuHXoeZmZmZmVnltNulcCKwaZ55aTPS1N2vZd1NbM8izWoFMJt1039fAOw7yvtGmZmZmZmZjQkTh8/SWETcKemzpBmaHiPd92gx8GCsm653OeturDuVdFM0ImKtpNXA1qSpL58maT4wH2DTTTfdc+rUqTzjGb0favbUU085DsdR6TjKiOFPf/rTfZFuGGvANttsE9OmTevqMR955BE233zzrh5zJBzf6FU5NqhmfIsXL3aZlI2kPKriZ1lT5djA8bWrn+MbUXk02ukNga2AnwHPBjYCLiLddXtpIc8OwJL8+CZg+8K6PwNbD3WMPffcM6666qqoAsexPsexvirEUUYMwLVRgelUq7Lsueeebb6jI1eF/62hOL7Rq3JsEdWMz2XS6MqjKn6WNVWOLcLxtauf4xtJedROc/jrgNsi4t6IeAK4kHS/lMmFmztuT7pvCqSrXTtAuiEk6aZzD7RxfDMzMzMzs0prp8J1O7CPpM3yWKx9gd8DVwEH5zxzSTeDhHT36bn58cHAz3Lt0MzMzMzMrC+NusIVEdeQJr+4jjQl/DOA04DjgGMlLSWN0To9b3I6sHVOPxZY2EbcZmZmZmZmlTfqSTMAIuLjwMfrkm8F9mqQ96/AIe0cz8aeaQsv7ej+li06sKP7M7Pq6kT5sWDGWubl/bj8sPFqyZ2rn/4edIK/S2Yj0/vp3czMzMzMzPqUK1xmZmZmZmYlaatLofWXkXTfKXbTMTMzMzOzxnyFy8zMzMzMrCSucJmZmZmZmZXEFS4zMzMzM7OSuMJlZn1D0gck3STpd5K+K2kTSTtKukbSLZLOk/TMnHfj/HxpXj+tt9GbmZlZP3KFy8z6gqSpwHuBmRHxImACcChwInByREwHVgFH5k2OBFZFxPOBk3M+MzMzs45yhcvM+slEYFNJE4HNgBXAa4EL8vqzgIPy49n5OXn9vpLUxVjNzMxsHPC08GbWFyLiTkmfBW4HHgMuBxYDD0bE2pxtOTA1P54K3JG3XStpNbA1cF9xv5LmA/MBBgYGGBwcLPmVrG/NmjVdP+ZIlBnfghlrh880jIFN1+2nau/jeP5szczGE1e4zKwvSNqKdNVqR+BB4HvA/g2yRm2TIdatS4g4DTgNYObMmTFr1qxOhNuywcFBun3MkSgzvk7c62/BjLWctCT91C07fFbb++uk8fzZmpmNJ+5SaGb94nXAbRFxb0Q8AVwIvAyYnLsYAmwP3JUfLwd2AMjrnwU80N2QzczMrN+5wmVm/eJ2YB9Jm+WxWPsCvweuAg7OeeYCF+fHl+Tn5PU/i4gNrnCZmZmZtcMVLjPrCxFxDWnyi+uAJaTy7TTgOOBYSUtJY7ROz5ucDmyd048FFnY9aDPrW5ImS7pA0h8k3SzppZKmSLoi36biitwVGiVfyLepuFHSHr2O38w6x2O4zKxvRMTHgY/XJd8K7NUg71+BQ7oRl5mNS6cAP4mIg/P9/zYDPgJcGRGLJC0kNfQcRxpvOj0vewOn5r9m1gd8hcvMzMysgyRtCbyKfEU9Ih6PiAdZ/3YU9bepODuSq0ljT7frcthmVhJXuMzMzMw6ayfgXuCbkq6X9A1JmwMDEbECIP/dNud/+jYVWfEWFmY2xrlLoZmZmVlnTQT2AN4TEddIOoWhx4m2dJuK0d4XsHg/uk7o5P3Zqn6/N8fXHseXuMJlZmbjwrQO3Ner3rJFB3Z8n9YXlgPL82Q+kCb0WQjcI2m7iFiRuwyuLOTfobB98RYWTxvtfQG/eM7FT9+PrhM6eU+7qt/vzfG1x/El7lJoZmZm1kERcTdwh6Sdc1LtNhXF21HU36ZiTp6tcB9gda3roZmNfW01d0iaDHwDeBHp0ve7gD8C5wHTgGXA2yJiVb4vzinAAcCjwLyIuK6d45uZmZlV1HuAc/IMhbcC7yQ1dJ8v6UjSvQNrM6VeRjo/Wko6R3pn98M1s7K0e33ZU56amZmZ1YmIG4CZDVbt2yBvAEeXHpSZ9cSouxR6ylMzMzMzM7OhtXOFqzjl6W7AYuB91E15Kmm4KU/X66NcPwNPVWY3GQ9xjGQGo07PeNSq+tc+Hj6XsRSDmZmZma2vnQpXKVOe1s/AM2nSpErMblKVWVbKjGPeCGbwWjBjbUdnPGpV/cxI4+FzGUsxmJmZmdn62pmlsNGUp3uQpzwFGM2Up2ZmZmZmZv1i1BUuT3lqZmZmZmY2tHb7hHnKUzMzMzMzsybaqnB5ylMzMzMzM7Pm2hnDZWZmZmZmZkNwhcvMzMzMzKwkrnCZWd+QNFnSBZL+IOlmSS+VNEXSFZJuyX+3ynkl6QuSlkq6UdIevY7fzMzM+k/3b6RkZlaeU4CfRMTBeTKfzYCPAFdGxCJJC0n3CzwO2B+Ynpe9gVPzXzMzG8K0Edy3czgLZqxl3sJLWbbowI7t06xqfIXLzPqCpC2BVwGnA0TE4xHxIDAbOCtnOws4KD+eDZwdydXA5No9BM3MzMw6xRUuM+sXOwH3At+UdL2kb0jaHBio3fMv/902558K3FHYfnlOMzMzM+sYdyk0s34xEdgDeE9EXCPpFFL3wWbUIC02yCTNB+YDDAwMMDg42IFQW7dmzZquH3MkivEtuXN1R/e9YEb7+xjYNHVZKks7n81Y+mzNzGz0XOGyMaW+33it73c73G+8bywHlkfENfn5BaQK1z2StouIFbnL4MpC/h0K228P3FW/04g4DTgNYObMmTFr1qySwm9scHCQbh9zJIrxtftdLMOCGWs5aUl5P3XLDp816m3H0mdrZmaj5y6FZtYXIuJu4A5JO+ekfYHfA5cAc3PaXODi/PgSYE6erXAfYHWt66GZmZlZp/gKl5n1k/cA5+QZCm8F3klqWDpf0pHA7cAhOe9lwAHAUuDRnNfMzMyso1zhMrO+ERE3ADMbrNq3Qd4Aji49KDMzMxvX3KXQzMzMzMysJK5wmZmZmZmZlcQVLjMzMzMzs5K4wmVmZmZmZlYST5phZmZmZtZl9fcWHY3i/Uh9X9HqcoVrDOvEF9XMzMzKIWkCcC1wZ0S8SdKOwLnAFOA64IiIeFzSxsDZwJ7A/cDbI2JZj8I2sw5zl0IzMzOzcrwPuLnw/ETg5IiYDqwCjszpRwKrIuL5wMk5n5n1CVe4zMzMzDpM0vbAgcA38nMBrwUuyFnOAg7Kj2fn5+T1++b8ZtYH3KXQzMzMrPM+D3wI2CI/3xp4MCLW5ufLgan58VTgDoCIWCtpdc5/X3GHkuYD8wEGBgYYHBxsKZCBTdNYnyqqxdbqa+m2NWvWlBZbJz6T4mdbxfewzPevE7oVX9sVLvdPNjMzM1tH0puAlRGxWNKsWnKDrNHCunUJEacBpwHMnDkzZs2aVZ+loS+eczEnLalmG/uCGWs5aclElh0+q9ehNDQ4OEir7/NIzevQpBm1z7aK72GZ718ndCu+Tnz7av2Tt8zPa/2Tz5X0VVK/5FMp9E+WdGjO9/YOHN/MzMysSl4OvEXSAcAmpHOkzwOTJU3MV7m2B+7K+ZcDOwDLJU0EngU80P2we6fTE4F5xj6rkrbGcLl/spmZmdn6IuLDEbF9REwDDgV+FhGHA1cBB+dsc4GL8+NL8nPy+p9FxAZXuMxsbGp30oxa/+Sn8vOW+ycDtf7JZmZmZuPBccCxkpaSzoFOz+mnA1vn9GOBhT2Kz8xKMOouhWX1T64fEFqVwXZVjKOXA2CrMgC3E3F04nOtwv9HFWIwM7P1RcQgMJgf3wrs1SDPX4FDuhpYn+tUF8XajYXdRdHa0c4YrlL6J9cPCJ00aVIlBttVZdBfMY5ODLYcreIgzV7qRBydGGRahf+PKsRgZmZmZusbdZdC9082MzMzMzMbWhk3Pnb/ZDMzMzMzMzp042P3TzazqvC9Ac3MzKxKyrjCZWbWS7V7A9bU7g04HVhFuicgFO4NCJyc85mZmZl1lCtcZtY3fG9AMzMzq5reTzNnZtY5tXsDbpGft3xvQEm1ewPeV9xh/a0quj31ftWn+6/KrSqaKfsWFu18NmPpszUzs9FzhcvM+kJZ9wasv1VFt6fer/p0/1W5VUUzZd/Cop3bSoylz9bMzEbPFS4z6xel3BvQzMzMrB0ew2VmfcH3BjQzM7Mq8hUuM+t3xwHnSjoBuJ717w34rXxvwAdIlTSzEZnWRjfKBTPWbtANc9miA9sNyczMKsYVLjPrO743oJmZmVWFK1xmZmZmZmNcO1fcm/FV987wGC4zMzMzM7OSuMJlZmZmZmZWEle4zMzMzMzMSuIKl5mZmZmZWUk8aYaZmZmZ2RDKmJDCxg9f4TIzMzMzMyuJK1xmZmZmZmYlcYXLzMzMzMysJK5wmZmZmZmZlcSTZti414mBsAtmrGVe3o/vym5mNr5J2gE4G/j/gKeA0yLiFElTgPOAacAy4G0RsUqSgFOAA4BHgXkRcV0vYjezznOFy8xsHOrUjFvFxgYze9paYEFEXCdpC2CxpCuAecCVEbFI0kJgIXAcsD8wPS97A6fmv2bWB0bdpVDSDpKuknSzpJskvS+nT5F0haRb8t+tcrokfUHSUkk3StqjUy/CzMzMrCoiYkXtClVEPAzcDEwFZgNn5WxnAQflx7OBsyO5Gpgsabsuh21mJWlnDFet9eaFwD7A0ZJ2IbXWXBkR04Er83NYv/VmPqn1xszMzKxvSZoG/D1wDTAQESsgVcqAbXO2qcAdhc2W5zQz6wOj7lKYC4paofGwpGLrzayc7SxgkHS5/OnWG+BqSZMlbVcreMzMzMz6iaRJwPeB90fEQ2moVuOsDdKiwf7mkxqtGRgYYHBwsKU4BjZN3X+rqMqxgeNr9X+smTVr1rS9jzJ1K76OjOEaqvVG0nCtN65wmZmZWV+RtBGpsnVORFyYk++pNTbnLoMrc/pyYIfC5tsDd9XvMyJOA04DmDlzZsyaNaulWL54zsWctKSaw/YXzFhb2djA8S07fFZb2w8ODtLq/2kvdCu+tj+hsltvqlIzrmIcvWxxqUqLTxXj6NX/SVX+R3vFs4KZWVXk8uV04OaI+Fxh1SXAXGBR/ntxIf0YSeeSJstY7R5AZv2jrQpXN1pvJk2aVImacVVq6MU4ejkzWFVafKoYR7utQaNVlf/RHvKsYGZWFS8HjgCWSLohp32EVNE6X9KRwO3AIXndZaTGn6WkBqB3djdcMyvTqM9U3XpjZlXicaVmVhUR8Qsa9+wB2LdB/gCOLjUoM+uZdi4NuPVmhDp9g10za8zjSs3MzKwq2pml0K03ZlY5VZkVrFPKGpvXqbGPVRlH2UyV42sUW5XGYY73caFmZp3S+8EvZmYdUqVZwTqlrLF5nbpSXpVxlM1UOb5GsfVqDGgjHhdqZtYZ7dz42MysMloYVwobjiudo2QfPK7UzMzMSlDNZj8zs5HzuFIzMzOrHFe4zKwveFypmZmZVZG7FJqZmZmZmZXEFS4zMzMzM7OSuMJlZmZmZmZWEle4zMzMzMzMSuIKl5mZmZmZWUlc4TIzMzMzMyuJK1xmZmZmZmYlcYXLzMzMzMysJL7xsZmZWUVMW3hpx/e5bNGBHd+nmZm1zle4zMzMzMzMSuIKl5mZmZmZWUncpdCsw9wlyMpQxv+VmZmZlc9XuMzMzMzMzEriK1xmZmZmZraBdntXLJixlnmFfYzXHju+wmVmZmZmZlYSX+Fqor5GX19DNzMzMzMzG46vcJmZmZmZmZWk61e4JO0HnAJMAL4REYs6sV/P4GVmI1VWeWRmNhouk6zfjdeZnLta4ZI0Afgy8HpgOfAbSZdExO+7GYfZWNNKATWSbq9joXAqW5nlUSd/UNyd2Wx88DmS2ei085vb6De2jHOkbl/h2gtYGhG3Akg6F5gNuDAxs25zeWTjwmhPRppV9t1gUxqXSWZ9ShHRvYNJBwP7RcRR+fkRwN4RcUwhz3xgfn66M3A/cF/XgmxuGxxHkeNYXxXiKCOG50XEszu8z0popTzK6fVl0h+7Gmg1/reG4vhGr8qxQTXjG9dlUhvlURU/y5oqxwaOr139HF/L5VG3r3CpQdp6Nb6IOA047ekNpGsjYmbZgQ3HcTiOqsdRhRjGmGHLI9iwTOq2qn+ujm/0qhwbVD++PjTic6SWd1zhz7LKsYHja5fjS7o9S+FyYIfC8+2Bu7ocg5kZuDwys2pxmWTWp7pd4foNMF3SjpKeCRwKXNLlGMzMwOWRmVWLyySzPtXVLoURsVbSMcBPSVOenhERNw2zWc+68tRxHOtzHOurQhxViGHMGGV51AtV/1wd3+hVOTaofnx9peQyqcqfZZVjA8fXLsdHlyfNMDMzMzMzG0+63aXQzMzMzMxs3HCFy8zMzMzMrCSVq3BJmiDpekk/ys93lHTHj1J/AAAgAElEQVSNpFsknZcHknYjjmWSlki6QdK1OW2KpCtyLFdI2qrkGCZLukDSHyTdLOmlPYhh5/we1JaHJL2/23HkWD4g6SZJv5P0XUmb9OL/Q9L7cgw3SXp/Tiv9/ZB0hqSVkn5XSGt4XCVfkLRU0o2S9uh0PFa++vKwShqVkVXSqPzsdUw1zcrVXsdV1Ki87XVMNnKS9pP0x/xbsLAC8bT8O9aj+HaQdFUuM26S9L4qxZjPe34t6bc5vk/k9J6cKzeJsRLn8UPE15Pz+8pVuID3ATcXnp8InBwR04FVwJFdjOU1EbF7YX7+hcCVOZYr8/MynQL8JCJeAOxGel+6GkNE/DG/B7sDewKPAj/odhySpgLvBWZGxItIA4oPpcv/H5JeBPwzsBfpM3mTpOl05/04E9ivLq3ZcfcHpudlPnBqCfFY+erLw6qpLyOrpFH5WQlDlKuVMER5a2OIpAnAl0m/B7sAh0napbdRjeh3rBfWAgsi4oXAPsDR+T2rSox/A14bEbsBuwP7SdqH3p4r16vSeXwzXT+/r1SFS9L2wIHAN/JzAa8FLshZzgIO6k10AMzOMZQei6QtgVcBpwNExOMR8WA3Y2hgX+DPEfGXHsUxEdhU0kRgM2AF3f//eCFwdUQ8GhFrgf8DvJUuvB8R8T/AA3XJzY47Gzg7kquByZK263RMVp768tBaN0T5WUXFcrVK6stb3w9q7NkLWBoRt0bE48C5pN+Gnhnh71jXRcSKiLguP36YVHGYSkVizL/pa/LTjfISVORceQycxzdT+udbqQoX8HngQ8BT+fnWwIP5xBbSTQGndimWAC6XtFjS/Jw2EBErIH0pgW1LPP5OwL3AN/Ol2W9I2rzLMdQ7FPhuftzVOCLiTuCzwO2kitZqYDHd///4HfAqSVtL2gw4gHSjyl59Ls2OOxW4o5Cvm98d64z68rBqGpWRVdGs/KyiYrlaCY3K24i4vLdR2SiMld+BXp7XNCVpGvD3wDVUKMbcZe8GYCVwBfBneneuXK9K5/HN9OT8vjIVLklvAlZGxOJicoOs3ZrH/uURsQfpUvzRkl7VpePWTAT2AE6NiL8HHqGHl9lzn9u3AN/r0fG3IrVA7Ag8B9ic9NnUK/X/IyJuJl0evwL4CfBbUheEqunld8fa1KQ8rJpel5FDqVT52Uyvy9VmGpW3kv6pt1HZKPh3YJQkTQK+D7w/Ih7qdTxFEfFk7o68Pekq5gsbZetuVJU8j2+mJ79dlalwAS8H3iJpGemy92tJNeXJuUsDpH+urnRriIi78t+VpL71ewH31Lpl5b8rSwxhObA8Iq7Jzy8gnUB0M4ai/YHrIuKe/LzbcbwOuC0i7o2IJ4ALgZfRg/+PiDg9IvaIiFeRukbcQu8+l2bHXU668lbTte+OdcQG5aGkb/c2pPU1KSOroln5WTX15WpVNCtvbWwZK78Dvfr9bEjSRqTK1jkRcWFOrlSMALmb9CBprFlPzpXrVOo8vplend9XpsIVER+OiO0jYhqpi8XPIuJw4Crg4JxtLnBx2bFI2lzSFrXHwBtIXckuyTGUHktE3A3cIWnnnLQv8PtuxlDnMNbv9tLtOG4H9pG0We4TXHs/evH/sW3++1zgH0jvS68+l2bHvQSYo2QfUpegFV2KydrUpDyszBWGIcrIShii/Kya+nK1KhqVt5WZdMRa9htgep4l7pmksuSSHsfUSK9+PzeQ/99PB26OiM8VVlUiRknPljQ5P96U1DhyMz04F6pXpfP4Znp6fh8RlVuAWcCP8uOdgF8DS0ndLjbuwvF3InUV+y1wE/DRnL41afaSW/LfKSXHsTtwLXAjcBGwVbdjyHFsBtwPPKuQ1os4PgH8gfTl+BawcY/+P35OOnn7LbBvt94P0onZCuAJUsvlkc2OS7qM/2VS3+4lpNnGSn1fvJT2//Z0eViVpVkZWaWlUfnZ65jq4tugXK3S0qi87XVMXkb1OR4A/Cn/FvT8ezqS37EexfcKUpe3G4Eb8nJAVWIEXgxcn+P7HfCxnN71c6Fh4nz6d6tKsTX77erG56t8IDMzMzMzM+uwynQpNDMzMzMz6zeucJmZmZmZmZXEFS4zMzMzM7OSuMJlZmZmZmZWEle4rClJ75B0raQ1klZI+rGkV0g6frh7AkkalLRK0sZ16dtL+r6k+yStlrRE0rzC+iMl/UHSw5LukXRpbQpPMzMzM7OxxhUua0jSsaQb1v0XMAA8F/gKMLuFbacBryRNrfqWutXfAu4AnkeahnMOcE/e7tX5eIdFxBaku6ef3/aLMbOOadIQ87/z8zWSHpf0ROH5j4fY1zRJUci7TNLCJnkbNuLkdXtJukzSg5IekPRrSe8srP+IpNvyMZZLOq+wboqk83Ij0H2SzpG0ZV63raTvSrorNxD9UtLeDd6Pv0h6RNJFkqYU1h2T36u/STqzQdz75gamRyVdJel5DfJMkXSvpF80ex/N+lUuEx4rlBFr8ndueYO8g5KOyo+bNgznfb5uFLHsKOkpSV+pS6+VYxPr0s+UdEJ+PE/Skzn+hyT9VtKbGuyjaVko6YWSfpbLoqWS3lq3/m2Sbs4N1r+XdFDd+g9Iujtvf0axLJX0qdwAvlbS8Q1e+1Dl3Jq65UlJXxzRmzsOuMJlG5D0LOCTwNERcWFEPBIRT0TEDyPigy3sYg5wNXAm624kV/MS4My8z7URcX1E/Liw7lcRcT1ARDwQEWdFxMMdeWFm1hY1b4i5PiImRcSkvO682vOI2L+FXU/O2x4M/Kek19cddxpNGnEkvRT4GfB/gOeTGnL+Ddg/r58LHAG8Lh9jJuk+KzUnkO5xuBPwv/LrOj6vm0S6eeyewBTgLOBSSZPyvncFvpb3PwA8mt+Pmrvy/s+of8GStgEuBP4z7/ta4Lz6fMCJ+KbDNr69uVCeTCJ9r3phDrAKOLRRw08LfpXjn0wqJ85VvolxQcOyMFfmLgZ+RCov5gPflvR3ef1U4NvAscCWwAeB70jaNq9/I7CQdBPzaaTy7hOF4y4FPgRcWh/0cOVc3WczADxGut+WFbjCZY28FNgE+MEot58DnJOXN0oaKKy7GviypEMlPbduu2ty/k9IevkoCzQzK0EHGmKGFRHXkm5GuXvdqqEacf4bOCsiToyI+yJZHBFvy+tfAvw0Iv6cj3F3RJxW2H5H4KKIeCgiVpPKvV1z3lsj4nMRsSIinszbPRPYOW97OPDDiPifiFhDqjz9g3I36Pw+XUS6wXG9fwBuiojvRcRfSZW83SS9oJYhVyZfBHxz2DfPzMo2B/gP0k2b3zzanUTEU6TePpsD05vkqS8LXwA8Bzg5l0U/A35JqgQBbA88GBE/zmXgpcAjpEYkSOXm6RFxU0SsAj4FzCsc76zc+N2ogXvIcq7OwcBK4OctvBXjiitc1sjWwH0RsXakG0p6Bam74PkRsZh0d/t3FLIcQvoi/idwm6QbJL0EICJ+TjoJ2YPUynK/pM9JmtDWqzGzTmi3IWZYkvYhVTCW1q1q2IgjabMc1wVD7PZqYI6kD0qa2aA8+TLwJklbSdoK+EegYTdISbuTKly1+HYFfltbnyt1jwN/N9xrbbDtI6Tyctd8rAk5tmNIV/bMrEckvZJUqTmXNNRhThv7mgC8k1Rx+0uTPPVloRply3kgXSG/WdJbJE3I3Qn/BtyY169X3uTHA5K2biHkkZRzc4GzI8JlVh1XuKyR+4Ft6vsjt2gucHlE3Jeff4dCi3RErIqIhRGxK+nS8w3ARZKU1/84It5MumQ+m9QCc9SoX4mZdcqoG2JacJ+kx4BfkbqqXFRbMUwjzlak37EVzXYcEd8G3gO8kdTtcGXd2IjrSJWo+/PyJOt3C6zFsSWpVfoT+UoYpC6Hq+uyrgZamehnuG3fC1yTX7PZeHaR0vjMByVdNHz2UswFfpyvDn0H2L/WXW8E9pH0IPBX4LPAP0XEyro8zcrCP5CuHH1Q0kaS3gC8GtgMICKeBM7Osf0t//2X3JADG5Y3tcedKKsAyL2WXk3qem11XOGyRn5FKhAOGi5jkaRNgbcBr84DM+8GPkDqJrNbff5cKfss6TL5lLp1T0XElaSxGS+q39bMuq6dhpjhbEP6Uf93YBawUWHdUI04q4CngO2G2nlEnBMRryONnfhX4JN5TAOksQZ/Ip08bEmq0K032D6XbT8Ero6I/7+wak3epmhLGnfLqdd0W0nPIVW4PtrCfsz63UERMTkvBwFrWb+MqNmIdNWoo/L3/xDSFXYi4lfA7axr+Kk1QtXHVB/P1RExmdRQdAlpXGq9hmVhRDxBOic7ELgbWEC60rY8x/g64DN5m2eSKj7fyFflYcPypva4rbKqLm0O8IuIuK2FfY47rnDZBnLr7cdIY60OkrRZblHZX9JncrZnSNqksGxMKgyeBHYh9TvenTTT4M/Jl98lnSjpRZIm5v6//wYsjYj7Jc3OY7u2UrIXqdC4uqtvgJk1MqqGmFblcQkn5WO8G4ZvxImIR3Nc/9jiMZ6IiO+RutnUGnJ2A76Wx6StAb4KHFDbJpdtFwF3Av9St8ub8va1vDsBG5MqcMOp33Zz0niLm4C9SJXI3+fXfAqwV34P3MXaxrvbSY0/k2oJuZfM82jSRa9NbyVVML5SKIemsq5b4QpSxWpa3XY7NoonlzPvBo6Q9PcN1m9QFub0GyPi1RGxdUS8kTTxxa/z6t2B/4mIa3OD9W9I4+JrszGuV97kx/dERKPxpfVaLefm4KtbTbnCZQ1FxOdIs938B3AvaSr3Y1h3efsw0kw0teXPpFbnb0bE7Xlg+t0RcTfwJeDw3DK+GWkMyIPAraQCsjbr2Crgn4FbgIdIrcz/HRHnlPxyzWwYLTbEdMIi4EOSNqGFRhzSzFrz8hitrQEk7Sbp3Px4nqQDJW0h6RmS9ieNSbgmb/8b4ChJm+YK3nzyeAVJG5HGhz0GzMmD3YvOAd4s6ZW5wvRJ4MLIM6vmhqVNgAnAhNw4VbtC+APgRZL+Mef5GHBjRPyBNIZsWuE1fwy4Htg9dx0yG7ci4nbS9/dESZNyo8gHSVeaig20jRqGazaqWzfUlfu5pJlGZ7DuO/lyYHdJM/J38vvApyVtncvFw0jlVsPxoLmi8w3Sd7uZYlmIpBfnWDeT9O+kRpkzc97fAK+sXdHKFblXsm4M19nAkZJ2yWNV/6OwLTnmTUj1gon5OLXGnSHLubz9y0iVUM9O2ExEePHixYsXLy0tpBmrriXNgHU3aYKblxXWHw98u8V9TSNNCDGxkCZSi+p7gJ8AJzXY7m352BPz871IJzargQdIJ2Nz8rp/IM3mtYrUkLMEmFfY146k7oL3521/AkzP616d43uU1K2mtryysP07SC3uj5CmbZ5S915E3XJ8Yf3rSGMzHgMGgWlN3qd5pK46Pf/8vXjp5gIsI93SoT59B9LJ/d3AfcBPgV0K6xt995YX9lm/7oQmx59KqsjNaLDuMuCz+fFWpArUnbms+SXw8kLeDb7DpEk4/ga8eLiyMD//77zvNbm8e37d/o4hTbLxMKlBe0Hd+mNJ9z19iDTz6caFdWc2eE/mFdY3Lefy+q8B3+r1/0uVF+U3yszMzMzMzDrMXQrNzMzMzMxK4gqXmZmVRtLhktY0WG7qdWxmZgCSPtKknGo4BstspNyl0MzMzMzMrCS+wmVmZmZmZlaSMm5g2THbbLNNTJs2raW8jzzyCJtvvnm5AbWp6jFWPT5wjJ3SaoyLFy++LyKe3YWQxoRGZVLVPu+qxQPVi8nxDK9qMdXicZm0zkjOkXqpav9LneDXNHaU+bpGVB71eprEoZY999wzWnXVVVe1nLdXqh5j1eOLcIyd0mqMwLVRgbKgKkujMqlqn3fV4omoXkyOZ3hVi6kWj8ukocujKqra/1In+DWNHWW+rpGUR+5SaGZmZmZmVpJhK1ySzpC0UtLvCmnHS7pT0g15OaCw7sOSlkr6o6Q3FtL3y2lLJS3s/EsxMzMzMzOrllaucJ0J7Ncg/eSI2D0vlwFI2gU4FNg1b/MVSRMkTQC+DOwP7AIclvOamZmZmZn1rWEnzYiI/5E0rcX9zQbOjYi/AbdJWgrsldctjYhbASSdm/P+fsQRN7HkztXMW3hpp3bHskUHdmxfZu2a1sH/bYAz9+u/gbE2Mp3+nwKXm2ZWHZ0u41y+WTvaGcN1jKQbc5fDrXLaVOCOQp7lOa1ZupnZiDTp5jxF0hWSbsl/t8rpkvSF3JX5Rkl7FLaZm/PfImluL16LmZmZ9b/RTgt/KvApIPLfk4B3AWqQN2hcsWt4x2VJ84H5AAMDAwwODrYU0MCmsGDG2pbytqLV447EmjVrStlvp1Q9Phi/MXbyfxvGxvs4hDOBLwFnF9IWAldGxKI8RnQhcBypG/P0vOxNKrv2ljQF+Dgwk1QWLZZ0SUSs6tqrMLO+JukDwFGkMmYJ8E5gO+BcYApwHXBERDwuaWNSmbYncD/w9ohY1ou4zazzRlXhioh7ao8lfR34UX66HNihkHV74K78uFl6/b5PA04DmDlzZsyaNaulmL54zsWctKRztxVbdnhrxx2JwcFBWn09vVD1+GD8xtjJ7rKQuhRW/X1spkk359nArPz4LGCQVOGaDZydp2+9WtJkSdvlvFdExAMAkq4gjTv9bsnhm9k4IGkq8F5gl4h4TNL5pDHuB5DGwJ8r6avAkaSGoCOBVRHxfEmHAicCb+9R+GbWYaOqoUjaLiJW5KdvBWpdey4BviPpc8BzSK3KvyZd+ZouaUfgTlKh8452AjczKxiolUkRsULStjm97W7Ow111r9rVwlbj6fRVU2jeM2CsvkfdUrV4oHoxVS2eFk0ENpX0BLAZsAJ4LevOf84CjidVuGbnxwAXAF+SpNxYZGZj3LAVLknfJbUGbyNpOakbzixJu5Muky8D/gUgIm7KrTi/B9YCR0fEk3k/xwA/BSYAZ0TETR1/NWZm62vWzblZ+oaJw1x1r9pV11bj6fRVU2jeM2CsvkfdUrV4oHoxVS2e4UTEnZI+C9wOPAZcDiwGHoyIWmtHsaHn6UagiFgraTWwNXBfcb+jHXbRS72qLHe6Uan4GsZoA8CQ+vE1QXVeVyuzFB7WIPn0IfJ/Gvh0g/TLgMtGFJ2ZWWvuqV15z10GV+b0Zt2cl7OuC2ItfbALcZrZOJAn7pkN7Ag8CHyPNKa0Xq2hp6VGoNEOu+ilXlWWO92oVGxQGmsNAK3ox9cE1Xld7cxSaGZWFZcAtZkG5wIXF9Ln5NkK9wFW566HPwXeIGmrfGL0hpxmZtYJrwNui4h7I+IJ4ELgZcBkSbXG7uJ49qcbh/L6ZwEPdDdkMyuLK1xmNqbkbs6/AnaWtFzSkcAi4PWSbgFen59Duqp+K7AU+DrwboA8WcangN/k5ZO1CTTMzDrgdmAfSZtJErAvabjFVcDBOU9941Ct0ehg4Gcev2XWPzo3rZ+ZWRc06eYM6YSmPm8ARzfZzxnAGR0MzcwMgIi4RtIFpKnf1wLXk7oCXgqcK+mEnFYbonE68C1JS0lXtg7tftRmVhZXuMzMzMw6LCI+TpporOhWYK8Gef8KHNKNuMys+9yl0MzMzMzMrCSucJmZmZmZmZXEFS4zMzMzM7OSuMJlZmZmZmZWEle4zMzMzMzMSuIKl5mZmZmZWUlc4TIzMzMzMyuJK1xmZmZmZmYl8Y2PzczMzKynpi28tNchmJXGV7jMzMzMzMxK4gqXmZmZmZlZSVzhMjMzMzMzK4nHcJmZmZmZDaE4xmzBjLXM68CYs2WLDmx7HzY2+AqXmZmZmZlZSVzhMjMzMzMzK4krXGZmZmZmZiVxhcvMzMzMzKwkrnCZWV+QtLOkGwrLQ5LeL+l4SXcW0g8obPNhSUsl/VHSG3sZv5n1F0mTJV0g6Q+Sbpb0UklTJF0h6Zb8d6ucV5K+kMujGyXt0ev4zaxzXOEys74QEX+MiN0jYndgT+BR4Ad59cm1dRFxGYCkXYBDgV2B/YCvSJrQi9jNrC+dAvwkIl4A7AbcDCwEroyI6cCV+TnA/sD0vMwHTu1+uGZWFle4zKwf7Qv8OSL+MkSe2cC5EfG3iLgNWArs1ZXozKyvSdoSeBVwOkBEPB4RD5LKnbNytrOAg/Lj2cDZkVwNTJa0XZfDNrOS+D5cZtaPDgW+W3h+jKQ5wLXAgohYBUwFri7kWZ7T1iNpPqnFmYGBAQYHB9dbv2bNmg3SeqnVeBbMWNvxYzc77lh9j7qlavFA9WKqWjwt2Am4F/impN2AxcD7gIGIWAEQESskbZvzTwXuKGxfK49WdC9kMyuLK1xm1lckPRN4C/DhnHQq8Ckg8t+TgHcBarB5bJAQcRpwGsDMmTNj1qxZ660fHBykPq2XWo2nEzftrLfs8MbHHavvUbdULR6oXkxVi6cFE4E9gPdExDWSTmFd98FGWiqPhmsAqqJeNgKVZWDTzsRbpc9vDDZqtKQqr8sVLjPrN/sD10XEPQC1vwCSvg78KD9dDuxQ2G574K5uBWlmfW05sDwirsnPLyBVuO6RtF2+urUdsLKQf9jyaLgGoCrqZSNQWRbMWMtJS9o/hW7WSNULY7BRoyVVeV0ew2Vm/eYwCt0J68ZBvBX4XX58CXCopI0l7UgarP7rrkVpZn0rIu4G7pC0c07aF/g9qdyZm9PmAhfnx5cAc/JshfsAq2tdD81s7Bu2wiXpDEkrJf2ukDbiaU0lzc35b5E0t9GxzMzaIWkz4PXAhYXkz0haIulG4DXABwAi4ibgfNJJ0E+AoyPiyS6HbGb96z3AObns2R34L2AR8HpJt5DKqkU572XAraTJe74OvLv74ZpZWVq5Hnom8CXg7EJabVrTRZIW5ufHsf60pnuTxk7sLWkK8HFgJqlP8mJJl+SB62ZmHRERjwJb16UdMUT+TwOfLjsuMxt/IuIG0nlPvX0b5A3g6NKDMrOeGPYKV0T8D/BAXfJIpzV9I3BFRDyQK1lXkO57Y2ZmZmZm1rdGO+JvpNOaNkvfwGhn4OnUjDE1ZcxoUpWZUpqpenwwfmPs9OxNY+F9NDMzM+sHnZ6lsNm0pi1Ndwqjn4Hni+dc3JEZY2rKmDmmKjOlNFP1+GD8xtjp2ZvO3G/zyr+PZmZmZv1gtLMU3lOb+avFaU09/bKZmZmZmY07o61wjXRa058Cb5C0VZ7R8A05zczMzMzMrG8N2wdP0neBWcA2kpaTZhtcBJwv6UjgduCQnP0y4ADStKaPAu8EiIgHJH0K+E3O98mIqJ+Iw8zMzMzMrK8MW+GKiMOarBrRtKYRcQZwxoiiMzMzMzMzG8NG26XQzMzMzMzMhuEKl5mZmZmZWUlc4TIzMzMzMyuJK1xmZmZmZmYl6fSNj83MrATTWrz59YIZazt+o2wzMzMbPVe4zMysI5pVCkdbCVy26MB2QzIzM+s5dyk0MzMzMzMriStcZmZmZmZmJXGFy8zMzMzMrCSucJmZmZmZmZXEFS4zMzMzM7OSuMJlZn1D0jJJSyTdIOnanDZF0hWSbsl/t8rpkvQFSUsl3Shpj95Gb2b9RtIESddL+lF+vqOka3J5dJ6kZ+b0jfPzpXn9tF7GbWad5QqXmfWb10TE7hExMz9fCFwZEdOBK/NzgP2B6XmZD5za9UjNrN+9D7i58PxE4ORcHq0CjszpRwKrIuL5wMk5n5n1CVe4zKzfzQbOyo/PAg4qpJ8dydXAZEnb9SJAM+s/krYHDgS+kZ8LeC1wQc5SXx7VyqkLgH1zfjPrA77xsZn1kwAulxTA1yLiNGAgIlYARMQKSdvmvFOBOwrbLs9pK4o7lDSfdAWMgYEBBgcH1zvgmjVrNkgrw4IZa1vKN7Bp63m7ZbQxlfW+dusza1XV4oHqxVS1eFr0eeBDwBb5+dbAgxFR+zLUyhwolEcRsVbS6pz/vu6Fa2ZlcYXLzPrJyyPirlypukLSH4bI26j1ODZISJW20wBmzpwZs2bNWm/94OAg9WllmLfw0pbyLZixlpOWVKtoH21Myw6f1flg6N5n1qqqxQPVi6lq8QxH0puAlRGxWNKsWnKDrNHCuuJ+h2wAqqJWK8tVaygaSqcatqr0+Y3RRo1hVeV1VetX2cysDRFxV/67UtIPgL2AeyRtl69ubQeszNmXAzsUNt8euKurAZtZv3o58BZJBwCbAFuSrnhNljQxX+Uqljm18mi5pInAs4AH6nc6XANQFbVaWW61UakKOtWwVVaj0miMtUaNVlXldbnCZWZ9QdLmwDMi4uH8+A3AJ4FLgLnAovz34rzJJcAxks4F9gZW17oempm1IyI+DHwYIF/h+veIOFzS94CDgXPZsDyaC/wqr/9ZRGxwhcv6y7QOVzKXLTqwo/uzznGFy8z6xQDwgzzOfCLwnYj4iaTfAOdLOhK4HTgk578MOABYCjwKvLP7IZvZOHMccK6kE4DrgdNz+unAtyQtJV3ZOrRH8ZlZCVzhMrO+EBG3Ars1SL8f2LdBegBHdyE0MxvHImIQGMyPbyV1da7P81fWNQaZWZ/xtPBmZmZmZmYlcYXLzMzMzMysJK5wmZmZmZmZlcQVLjMzMzMzs5K4wmVmZmZmZlYSV7jMzMzMzMxK4gqXmZmZmZlZSVzhMjMzMzMzK0lbFS5JyyQtkXSDpGtz2hRJV0i6Jf/dKqdL0hckLZV0o6Q9OvECzMzMzMzMqmpiB/bxmoi4r/B8IXBlRCyStDA/Pw7YH5iel72BU/NfMzOzDUxbeGlH97ds0YEd3Z+ZmVkryuhSOBs4Kz8+CziokH52JFcDkyVtV8LxzczMzMzMKqHdK1wBXK7/x969x8lR1Xkf/3wh3AMEiMSYRIJrdAWzIGQRZNcdBbl5Ca7gBpEkLhhfK+Atq4K6giI+6COi+CAahQUUuYggeTSAAZnHywpykUu4aYQIQwLILRDwFvg9f5zTpNLpmemZ6equnnzfr1e9pvvUqapf11SfPqPwe2wAACAASURBVKfq1CkpgG9GxAJgQkSsAIiIFZK2z3knAQ8Ulu3LaSuKK5Q0D5gHMGHCBHp7e5sKZMJmMH/66hF8lLU1u92hWLVqVSnrbZWqxwfrb4ytPLahO/ajmZmZ2Wgw0gbX3hGxPDeqFku6e4C8apAW6ySkRtsCgBkzZkRPT09TgXzt/Ms59fZW9JBMlh3e3HaHore3l2Y/TydUPT5Yf2Oc2+KuVeccsEXl96OZmZnZaDCiLoURsTz/fQS4DNgDeLjWVTD/fSRn7wOmFBafDCwfyfbNzMzMzMyqbNgNLklbSNqy9hrYD1gCLATm5GxzgMvz64XA7Dxa4Z7AylrXQzMzMzMzs9FoJH3wJgCXSaqt53sRcaWkG4CLJR0J3A8cmvMvAg4ClgLPAu8ZwbbNzMzMrAOGMoLo/OmrW94t3qzbDLvBFRH3Ars0SH8M2KdBegBHD3d7ZmZmZmZm3aaMYeHNzMzMzMwMN7jMbJSQNEXStZLuknSHpA/m9BMlPSjpljwdVFjmeElLJd0jaf/ORW9mo8kA5dG2khZL+l3+u01Ol6TTc3l0m6TdOvsJzKyV3OAys9FiNTA/Il4F7AkcLWmnPO+0iNg1T4sA8rxZwM7AAcDXJW3YicDNbNTprzw6DrgmIqYB1+T3AAcC0/I0Dziz/SGbWVnc4DKzUSEiVkTEzfn108BdpIer92cmcGFE/CUi7iMN6LNH+ZGa2Wg3QHk0Ezg3ZzsXODi/ngmcF8l1wLjaI3bMrPu17knBZmYVIWkq8BrgemBv4BhJs4EbSWednyBVfq4rLNZHgwaapHmkM85MmDCB3t7eteavWrVqnbQyzJ++uql8EzZrPm+7VCWmr52fnlIyYbM1r0dq+qStR7yOdh1DQ1G1mKoWz1DUlUcTao/EiYgVkrbP2SYBDxQWq5VHaz0+Z7DyqF2G8n2uyve/lar6mUZyPHTzd2wgVflcbnCZ2agiaSzwA+BDEfGUpDOBk4DIf08F/h1Qg8VjnYSIBcACgBkzZkRPT89a83t7e6lPK0OzwyrPn76aU2+vVtFetZhaGc+yw3tGvI52HUNDUbWYqhZPsxqUR/1mbZA25PKoXYYyzHvVvv+tUNXPNJLyqFu/Y4Opyudyl0IzGzUkbUSq3JwfEZcCRMTDEfFcRDwPfIs13Qb7gCmFxScDy9sZr5mNXo3KI+DhWlfB/PeRnO7yyGwUc4PLzEYFpVPHZwF3RcSXC+nF+yDeDizJrxcCsyRtImlH0s3qv25XvGY2evVXHpHKnTn59Rzg8kL67Dxa4Z7AylrXQzPrftW7HmpmNjx7A0cAt0u6Jad9AjhM0q6k7jnLgPcBRMQdki4G7iSNKHZ0RDzX9qjNbDTqrzw6BbhY0pHA/cChed4i4CDS4D3PAu9pb7hmViY3uMxsVIiIX9D4PohFAyxzMnByaUGZ2XppgPIIYJ8G+QM4utSgzKxj3KXQzMzMzMysJG5wmZmZmZmZlcQNLjMzMzMzs5K4wWVmZmZmZlYSD5phZmZmZtblpg7hgdT15k9f3fCB1stOefNIQrLMV7jMzMzMzMxK4gaXmZmZmZlZSdzgMjMzMzMzK4kbXGZmZmZmZiVxg8vMzMzMzKwkHqXQzMxsmEYyKlhNcXQwjwhmZjb6+AqXmZmZmZlZSdzgMjMzMzMzK4kbXGZmZmZmZiVxg8vMzMzMzKwkbnCZmZmZmZmVxKMUmpm1WCtGrjMzM+u0Vv+era8jsbrBZWZmVhFlNNbX1wqOreGTQGad1fYuhZIOkHSPpKWSjmv39s3MalwemVmVuEwyG53aeoVL0obAGcCbgD7gBkkLI+LOdsZhZubyyNYXw7m6UXwYcz1fMSuHyyRbH6yvV/Hb3aVwD2BpRNwLIOlCYCbgwsTM2s3lkZlVicsks2EYqBE30Mmj/pTRgFNEtHyl/W5MOgQ4ICKOyu+PAF4bEccU8swD5uW3rwTuaXL144FHWxhuGaoeY9XjA8fYKs3GuENEvKjsYDqhmfIopw9WJlXt/121eKB6MTmewVUtplo863WZNII6UidV7VhqBX+m7lHm52q6PGr3FS41SFurxRcRC4AFQ16xdGNEzBhuYO1Q9RirHh84xlbphhjbYNDyCAYvk6q2L6sWD1QvJsczuKrFVLV4SlJaHamTRuP/zp+pe1Tlc7V70Iw+YErh/WRgeZtjMDMDl0dmVi0uk8xGqXY3uG4ApknaUdLGwCxgYZtjMDMDl0dmVi0uk8xGqbZ2KYyI1ZKOAa4CNgTOjog7WrT6brjEXvUYqx4fOMZW6YYYS9XC8qhq+7Jq8UD1YnI8g6taTFWLp+VKriN10mj83/kzdY9KfK62DpphZmZmZma2Pmn7g4/NzMzMzMzWF25wmZmZmZmZlaSrGlySDpB0j6Slko5rMH8TSRfl+ddLmlrBGD8i6U5Jt0m6RtIOVYuxkO8QSSGp7cNpNhOjpHfmfXmHpO9VLUZJL5V0raTf5P/3QW2O72xJj0ha0s98STo9x3+bpN3aGV83aOJ/fJqkW/L0W0lPFuY9V5g34hvfR/L/lDRH0u/yNGeksQwhpsNzLLdJ+h9JuxTmLZN0e94/N7Ypnh5JKwv/l08X5jVVLrY4no8WYlmSj5lt87wy9s+UXCbdlcvNDzbI09bjqMmY2noc2dBV/fdwOEbjb+hIyuwqG+xzFfL9Yy5nD2lXbC+IiK6YSDeQ/h54GbAxcCuwU12e9wPfyK9nARdVMMY3AJvn1/9RxRhzvi2BnwHXATOqFiMwDfgNsE1+v30FY1wA/Ed+vROwrM0xvh7YDVjSz/yDgCtIz37ZE7i+nfFVfWr2u1LIfyzpJvfa+1VV+H8C2wL35r/b5NfbtCmm1xW+owcWjzFgGTC+zfuoB/jRSP/XrYqnLu9bgZ+WvH8mArvl11sCv21QbrX1OGoyprYeR56G/D+s/O/hMD/XqPsNHUmZXeWpmbI2H6c/BRYBh7Q7xm66wrUHsDQi7o2IvwIXAjPr8swEzs2vLwH2kdToQYIdizEiro2IZ/Pb60jP2WinZvYjwEnAF4E/tzO4rJkY3wucERFPAETEIxWMMYCt8uutafPzVCLiZ8DjA2SZCZwXyXXAOEkT2xNdV2j2u1JzGHBBWcGM4P+5P7A4Ih7P35fFwAHtiCki/qf2HaUN5V0T+6g/Q/1flxFPqccPQESsiIib8+ungbuASXXZ2nocNRNTu48jG7LK/x4Ox2j8Da1amd0qTZa1xwI/ANpdXwS6q0vhJOCBwvs+1v2heCFPRKwGVgLbtSW6uu1njWIsOpJ0dqSdBo1R0muAKRHxo3YGVtDMfnwF8ApJv5R0naSWVCCHoJkYTwTeLamPdEbl2PaE1rShHq/rm6b3j1LX4B1JZ89qNpV0Yz4+Dy4vzBf0F29V/s/15V0AP5F0k6R5bYxjL0m3SrpC0s45raP7SNLmpMbLDwrJpe4fpS73rwGur5vVseNogJiKqnIc2Rqj4fdwOKpStpalE3XUUkiaBLwd+EanYmjrc7hGqNGVqvox7ZvJU6amty/p3cAM4F9KjajBphukvRCjpA2A04C57QqogWb24xhSt8Ie0hmYn0t6dUQ8Wb9gSZqJ8TDgnIg4VdJewHdyjM+XH15TOv19qbqh7J9ZwCUR8Vwh7aURsVzSy4CfSro9In7f8ijX6C/ejv+fJb2B9OP9T4XkvfP+2R5YLOnufJayTDcDO0TEqnwPyQ9J5Uin99FbgV9GRPEMbWn7R9JYUuPuQxHxVP3sBouUfhwNElMtT1WOI1vbaPg9HI5Olxul6ee71s2+Anw8Ip5rb8e3NbrpClcfMKXwfjLrXpJ+IY+kMaTL1sPpXjJczcSIpH2BTwJvi4i/tCm2msFi3BJ4NdAraRmpX/JCtXfgjGb/15dHxN8i4j7gHlLFqV2aifFI4GKAiPgVsCkwvi3RNaep43U9NpT9M4u67mARsTz/vRfoJZ25L1N/8Xb0/yzpH4BvAzMj4rFaemH/PAJcRuqWVKqIeCoiVuXXi4CNJI2n89+FgY6flu4fSRuRGjbnR8SlDbK0/ThqIqZKHUe2jtHwezgcnS43StHfd63LzQAuzPXaQ4Cvt6nnyQu6qcF1AzBN0o6SNib9QNWP/LUQqI2cdAjpBuR2nm0YNMbcXe+bpMZWJ/qRDhhjRKyMiPERMTUippL68L4tIto5+lMz/+sfkgYgIVeYXkG6ibtKMd4P7JNjfBXpB+aPbYxxMAuB2XmkpT2BlRGxotNBVUgz/2MkvZI0iMCvCmnbSNokvx4P7A3cWXK8/f0/rwL2yzFtA+yX00on6aXApcAREfHbQvoWkrasvc4xDTi6VIvieXHtvl5Je5B+Ax+jyf91STFtTerpcHkhrZT9kz/7WcBdEfHlfrK19ThqJqaqHUe2jtHwezgco+43tL/vWreLiB0L9dpLgPdHxA/bGUPXdCmMiNWSjiEV8BuSRgO7Q9JngRsjYiGp0P6OpKWkK1uzKhjj/wbGAt/Pv/v3R8TbKhZjRzUZY+3H/07gOeCj7TwT02SM84FvSfowqZvB3HaeAJB0AanL5fjcb/4EYKMc/zdI/egPApYCzwLvaVds3WAI35XDgAvr/revAr4p6XlSpf6UiBhRg2u4/8+IeFzSSaRKEcBn67qulRnTp0n30X49l3erI2IGMAG4LKeNAb4XEVe2IZ5DgP+QtBr4EzAr/98a/q/bEA+k+wp+EhHPFBYtZf+QGv5HALdLuiWnfQJ4aSGmdh9HzcTU1uPIhqYbfg+HYzT+ho6gzK60JsvajlLFj3czMzMzM7Ou1U1dCs3MzMzMzLqKG1xmZmZmZmYlcYPLzMzMzMysJG5wmZnZekvSMqVHdZiZmZXCDS4bNkmfkPTtAea/UJEZLG+L45oqKZSexWZmHZC//3+StKowvSuPIFWft1fSUfn1iZK+O8A6h9Q4krSVpK9Iuj/HsDS/7/ZnAJmZWZdwg6uDqlIhycvtKOl5SV9vdpmI+HxEHNXKvJLOyY2lt9WlfyWnz202vsKyvZL+nPfvo5IulTRxqOsxsyF7a0SMrU20+aGg+ZlA1wA7AwcAWwGvIz17q6UPyM3P4vFvqlmLVKGOJKkn141q2++TdLGkf2yQV5LuVXpcTTH9ZEnX1KW9QtJTkqZLmpvrN1+uy3NwTj8nv//nun2xKs9/R2H7n5P0oKSVeZ/sXFjfJpLOztt9SNJHCvP2lLRY0uOS/ijp+8V6Ul73FyQ9lqcvSmnc+Dx/gaR78r6a2+z+XZ/4x6HzOlohKZgNPAHMUn5gawf9ljUPsEbpStWhwO9HsM5j8v59BTAOOG2oK5CvmJl1m9mk5zm9PSLujIjnI+KRiDgpIhYV8u0q6bZcSblI0qbwwgOsf5QrIE/k15NrC+UKzcmSfkl6Ds/L8smrn0l6WtLVks4oVv5yxeZ/JD0p6VZJPe3ZFWZdqQp1pOV521sCewJ3Az+XtE9dvtcD25PKgWKD7LPAiyW9F1542Pe3gC9HxO05z++Bf6urZ8wm1YcAiIif1+2LtwCrgNqz5w4F/h34Z2Bb4FfAdwrrOxGYBuwAvAH4mKQD8rxtgAXA1Dz/aeC/C8vOAw4GdgH+IW/7fYX5twLvB27GGnKDy2pmA58C/ga8tThD0s6FMx8PS/pETj+xriJxhKQ/5LMfn6xbR33efypUOh6oOyPyf4G9JW2T3x8A3AY8VFh+A0mfytt7RNJ5krYe7EPmB3X+AHh1Xs8mkr6k1N3oYUnfkLRZntejdDbr45IeIhc+kmZKuiWfJfp9ocAys2rZF7gyIlYNku+dpHJmR1JlYm5O34D0vd+B1HD7E/B/6pY9glQZ2RL4A/A94Nekh4eemOcDIGkS8GPgc6QK0X8CP5D0ouF8ODNrn0j6IuLTwLeBL9RlmQNcTnog8pzCcn8hNYROyWXAPFID5+TCsg8BtwP7A0jalnQ1fuEAIc0BLik8NH1H4BcRcW9EPAd8F9ipkH82cFJEPBERd5EafXNzjFdExPcj4qmIeJZUzu1dt61T8+d/EDiVNeUkEXFGRFwD/HmAeNdrbnAZkv4ZmAxcCFxM+lLW5m0JXE06g/IS4OWkLjr169gJOJNUuXgJqbIxuT5fzvtS4Arga8CLgF2BWwpZ/kwqZGbl97OB8+pWMzdPbwBeBoxl3YpQo22PB94B/CYnfYF01WvX/NkmkZ60XvNiUsVoB2CepD1yLB8lXSl7PbBssO2arad+mE+qPCnphx3Y/nbAiibynR4Ry/MJmf9LKg+IiMci4gcR8WxEPE2qIP1L3bLnRMQdEbEamAj8I/DpiPhrRPyCtStM7wYWRcSifLVtMXAjcNCIPqWZtdulwG6StgCQtDlwCHB+nmYpdWkGICKuB84h1R9OBv49Iv5Wt87zWFP/mkVqvP2l0cYL2zu3kHwh8HKl7oobkRpJV+b825DqZrcW8t9K6m7dyOuBOwrvdx7CstaAG1yd1+kKCaQv5RUR8QTp7OyBkrbP894CPBQRp0bEnyPi6Vxw1DsE+FFE/Cyfzfkv4Pl+tnc4cHVEXBARf8uVmlvq8pwHzM5Xrf4FqN83h5Mux9+bz14fTyrg+uv2d7qkJ0mFxArgI/my/nuBD0fE47lC9XnWNPTIn+GEiPhLRPwJOBI4OyIW5wrTgxFxdz/bNFvfHRwR4/J0MLAa2KhBvo1IV9db7TFSI2gwDxVeP0s6gYOkzSV9M19Jfwr4GTBO0oaF/A8UXr8EeDyfIW40fwfg0EKZ/yTwT03GaLY+qkIdqZHlgEgnXgH+ldQ4+gnwI2AM8Oa6ZT5FOrH7nYi4scE6LwN6cr2n0YnmoncAjwL/r5C2Avg5cA/pavyhwIfzvLH578pC/pWkK/NrkfQPpBPPHy0kj22w7NjifVw2MDe4Oq+jFZLcfe5Q0hkZIuJXwP3Au3KWKTR379RLKFQs8iXux/rJO+g685nhF5EKqB/lxk799v5QeP8HUgE3oZ9VfiDv40kRcXhE/DGvf3PgpkLl58qcXvPHiCheIm92f5jZuu4Hxkuq/fjX7mfYgbW/z61yNbB/7Sz0MMwHXgm8NiK2Ip31hVTRqonC6xXAtvnsc82UwusHSJWtcYVpi4g4ZZjxmY12nT5p059JpO/+k/n9HODiiFidTzpfSqFbIUCux9zH2leO6uf/mFTvGR8Rvxxg+3OA8yKiWP6cQLrCPgXYFPgM8NNcHtW6VW9VyL8V6V6tF0h6OakH0gcj4ueFWasaLLuqbvs2ADe4qqfdFZK3k744X1cateYhUkFSu6z9APB3TaxnBYWKRf6Cb9dP3mbX+V1ShafRWZ7lpH1S81JSQfxwE+uteZR0FmjnQoG+db4Ztaa+MGk2djOrExH3A9cDX5A0VmmAno+SvrvXFbJuIGnTwlQcyGejunkDDWbzHdJ39geS/j7f+7md0mMqmunGtyWpjHgy31NxwiCf7w+kLoInStpY0l6sfU/sd4G3Stpf0oY5/h4VBuIwswG1u47Un7cDN0fEM/n7+0bg3YV61CHAQRr64yfOI9V7vtNfBklTgB7WrRvtAlyU77NaHRHnkO4V2yn3YFqR8xTzv9D4k7QD6STVSRFRv/07BlrWBucGV8V0oEIyBzgbmE66b2FX0o2Su0qaTro0/mJJH8oDTGwp6bUN1nMJ8BalwTA2Jo3K09/xdT6wr6R3ShqTK0C7Nsh3OvAmUjeeehcAH1YaEWwsqSvgRfk+iqZExPOkm0ZPq3WhlDRJ0v4DLHYW8B5J++TK2yRJf9/sNs2MfyON5LUUeBDYBzio7kryYaSGTm0qXlVeVDfvxP42lM8070saVWwx8BRpQIvxpHJ2MF8BNiOdnLmONaOBDeRwYC/SFf7PAReR78OIiAeAmcAngD+SGoMfxb/FZk3pQB3pBUomSToBOIr0PYZ07/pvSVfDa/WoVwB9pLJsKP4fqd7ztQHyHAH8T0TU97a5gdRleUKunxxBuvK3NM8/D/iU0uirf0+6peKc/NkmAT8FzoiIbzTY5nmkWzEmSXoJqVF4Tm1mPsG0Kenqf23/ulwrighPHZpIgy3s2yB9CvB90n0FjwJXkc5Q1OafSLryUpz6Cuusn/e5frY/iVRITW8wbxHwpfz61aSBMp7IMR1XiOO7hWXmkM4+PQZ8svj5GuT9Z1Kh+RSp0jEnp58zQLy/AObm1xuQ+hg/QKq4fBfYJs+bmj/3mPy+Fziqn3VuSmqs3ZtjuYvU/RDSGaS+Bsu8nTRq4tOkgmz/Th9Lnjx5quZEanB9ptNxePLUbVOn60g5fw/pXu5VwDOk3jWXAHsW8twNHNtg2Y8BN9alrVMfIQ0A9ot+tv850sA81G3vyAZ5NwXOIF3Jeoo0RPsBhfmbkE6wP0XqDfSRwrwT8r5YVZwK8wV8EXg8T18EVPe56vdrT6ePoSpNyjvKzMzMRkjp+TuPk+7V2I804M9eEfGbARc0M7NRy5f7zKyrSDpb6dlrSwppJ0p6UOn5aLcU78+RdLykpZLuKXYXlXRATlsq6bh2fw5rjXw/1qoG0xUdCunFpLO9q0jdov/DjS0zs/Wbr3CtB5QeVPyJBrN+HhEHtjses5GQ9HpSZfa8iKg9wPpEUveHL9Xl3Yl0v98epJEtryb1rYfU5/5NpH72NwCHRcSd7fgMZmZWDa4jWTs0daOgdbeI+DzpPiWzrhcRP5M0tcnsM4ELIw2ecJ+kpaTGF8DSiLgXQNKFOa8bXGZm6xHXkawdKt3gGj9+fEydOrWpvM888wxbbDHcR620XzfF202xguNtpZtuuunRiHjR4Dkr4RhJs0nDcs+PNAzuJNYeuaovp8HaD6TtAxqNvomkecA8gM0222z3KVOmNMrWMc8//zwbbNA9vcO7LV7ovpi7LV5oPubf/va33VQmlWr8+PHxohe9qDK/H1X6LatKLFWJAxxLGXEMqY7U6VE7Bpp23333aNa1117bdN4q6KZ4uynWCMfbStSNsFSViTQS5ZLC+wnAhqT7Uk8Gzs7pZwDvLuQ7C3gH6WHf3y6kHwF8bbDtDqVMapcqHz+NdFu8Ed0Xc7fFG9F8zFUtkzox7b777pX6XzuWdVUljgjH0shI4xhKeVTpK1xmZs2IiBceeC3pW6Tnx0G6clW8JDWZNKwvA6SbmZmZtUx39TkwM2tA0sTC27cDtREMFwKz8kO7dwSmkR58ewMwLT84e2NgVs5rZmZm1lK+wmVmXUXSBaSHUY6X1Ed6YGOPpF1JD1tcBrwPICLukHQxaTCM1cDREfFcXs8xpAdmbkjqgnhHmz+KmZmZrQfc4DKzrhIRhzVIPmuA/CeT7uuqT18ELGphaGZmZmbrGDUNrtsfXMnc437csvUtO+XNLVuXmdn6YGoTZfD86aubLqtdDpvZaNZMmTkULjOry/dwmZmZmZmZlcQNLjMzMzMzs5K4wWVmZmZmZlYSN7jMzMzMzMxK4gaXmZmZWYtJ+rCkOyQtkXSBpE3zs/+ul/Q7SRfl5wCSnxV4kaSlef7UzkZvZq00aINL0tmSHpG0pJC2raTFucBYLGmbnC5Jp+cC4zZJuxWWmZPz/07SnHI+jpmZmVlnSZoEfACYERGvJj3vbxbwBeC0iJgGPAEcmRc5EngiIl4OnJbzmdko0cwVrnOAA+rSjgOuyQXGNfk9wIHAtDzNA86E1EAjPZz0tcAewAm1RpqZmZnZKDQG2EzSGGBzYAXwRuCSPP9c4OD8emZ+T56/jyS1MVYzK9GgDa6I+BnweF1ysWCoLzDOi+Q6YJykicD+wOKIeDwingAWs24jzszMzKzrRcSDwJeA+0kNrZXATcCTEbE6Z+sDJuXXk4AH8rKrc/7t2hmzmZVnuA8+nhARKwAiYoWk7XP6CwVGVitM+ks3MzMzG1VyL56ZwI7Ak8D3Sb2A6kVtkQHmFdc7j9SDiAkTJrBq1Sp6e3tbEfKIOZahxzF/+up+5w3HQNuqyj6B6sTSzjiG2+DqT38FRlMFCaxbmDS7IyZs1toDt+x/QFUOtmZ0U6zgeM3MrOP2Be6LiD8CSLoUeB2p58+YfBVrMrA85+8DpgB9uQvi1qzbu4iIWAAsAJgxY0aMHTuWnp6esj9LU3p7ex3LEOOYe9yPW7q9ZYf3v62q7BOoTiztjGO4Da6HJU3MV7cmAo/k9FqBUVMrTPqAnrr03kYrri9Mmt0RXzv/ck69vXXtx4EO2laoysHWjG6KFRyvmZl13P3AnpI2B/4E7APcCFwLHAJcCMwBLs/5F+b3v8rzfxoRDU9Mm1n3Ge6w8LWCAdYtMGbn0Qr3BFbmrodXAftJ2iZfZt8vp5mZmZmNKhFxPWnwi5uB20n1rQXAx4GPSFpKukfrrLzIWcB2Of0jrBmMzMxGgUEvCUm6gHR1arykPtJog6cAF0s6knQW59CcfRFwELAUeBZ4D0BEPC7pJOCGnO+zEbHOpXIzMzOz0SAiTiDVmYruJY3WXJ/3z6ypS5nZKDNogysiDutn1j4N8gZwdD/rORs4e0jRmZmZmZmZdbHhdik0MzMzMzOzQbjBZWZmZmZmVhI3uMzMzMzMzEriBpeZmZmZmVlJ3OAyMzMzMzMriRtcZmZmZmZmJXGDy8zMzMzMrCRucJmZmZmZmZXEDS4zMzMzM7OSuMFlZmZmZmZWkjGdDsDMzMzMrMqmHvfjIS8zf/pq5g5jORt9fIXLzMzMzMysJG5wmZmZmZmZlcQNLjMzMzMzs5K4wWVmZmZmZlYSN7jMrKtIOlvSI5KWFNK2lbRY0u/y321yuiSdLmmppNsk7VZYZk7O/ztJczrxWczMzGz0c4PLzLrNOcABdWnHAddExDTgmvwe4EBgWp7mAWdCaqABJwCvBfYATqg10szMWkHSOEmXSLpb0l2S9hrOySEz637DHhZe0iuBiwpJLwM+DYwD3gv8Mad/IiIW5WWOB44EngM+EBFXDXf7ZrZ+ioifULU35wAAIABJREFUSZpalzwT6MmvzwV6gY/n9PMiIoDrcgVoYs67OCIeB5C0mNSIu6Dk8G0IhjMM80CWnfLmlq7PbBBfBa6MiEMkbQxsDnyCdHLoFEnHkU4OfZy1Tw69lnRy6LWdCdvMWm3YDa6IuAfYFUDShsCDwGXAe4DTIuJLxfySdgJmATsDLwGulvSKiHhuuDGYmWUTImIFQESskLR9Tp8EPFDI15fT+ks3MxsxSVsBrwfmAkTEX4G/ShrSyaFauWbWjIFOUg33mWA+UdUarXrw8T7A7yPiD5L6yzMTuDAi/gLcJ2kpqSvPr1oUg5lZvUYFUgyQvu4KpHmk7ohMmDCB3t7elgXXCqtWrapMTPOnrx40z4TNmstXhuHupyrt42Z0W7zQnTEP4mWknj7/LWkX4Cbggwz95JAbXGajQKsaXLNYuyvOMZJmAzcC8yPiCVLBcV0hT8MzysOt3LT6R7zsgr+bfly6KVZwvOuph2tng3OXwUdyeh8wpZBvMrA8p/fUpfc2WnFELAAWAMyYMSN6enoaZeuY3t5eqhJTM2dP509fzam3t+qnZ2iWHd4zrOWqtI+b0W3xQnfGPIgxwG7AsRFxvaSvsube0kaaOglUX0eq0u/HaI9lOHXMTp5gqjfcWMr4n1blWGlnHCP+1cv9kt8GHJ+TzgROIhUUJwGnAv9Ok4XJcCs3Xzv/8tb+iN/+TOvWlRUvy3bTj0s3xQqOdz21EJgDnJL/Xl5IP0bShaT7IVbmRtlVwOcLA2Xsx5oyzMxspPqAvoi4Pr+/hNTgGurJobXU15HGjh1bmd+PKv2WlRHLcLrjdfIEU73hxjLcE1UDqcqx0s44WjFK4YHAzRHxMEBEPBwRz0XE88C3SN0GocnCxMxsIJIuIHVFfqWkPklHkhpab5L0O+BN+T3AIuBeYCmpPHo/QB4s4yTghjx9tjaAhpnZSEXEQ8ADeYAxSLde3Mmak0Ow7smh2Xm0wj3JJ4faGbOZlacVze7DKHQnrLvJ8+1A7Vk5C4HvSfoyadCMacCvW7B9M1uPRMRh/czap0HeAI7uZz1nA2e3MDQzs6JjgfNzT6B7SYOKbQBcnE8U3Q8cmvMuAg4inRx6Nuc1s1FiRA0uSZuTzia/r5D8RUm7kroLLqvNi4g7JF1MOsOzGjjaIxSamZnZaBQRtwAzGswa0skhM+t+I2pwRcSzwHZ1aUcMkP9k4OSRbNPMzMzMzKxbtOIeLjMzMzMzM2vADS4zMzMzM7OSuMFlZmZmZmZWEje4zMzMzMzMSuIGl5mZmZmZWUnc4DIzMzMzMyuJG1xmZmZmZmYlcYPLzMzMzMysJG5wmZmZmZmZlcQNLjMzMzMzs5K4wWVmZmZmZlYSN7jMzMzMzMxK4gaXmZmZmZlZSdzgMjMzMzMzK4kbXGZmZmZmZiVxg8vMzMzMzKwkY0aysKRlwNPAc8DqiJghaVvgImAqsAx4Z0Q8IUnAV4GDgGeBuRFx80i2322mHvfjF17Pn76auYX3w7HslDePNCQzMzMriaQNgRuBByPiLZJ2BC4EtgVuBo6IiL9K2gQ4D9gdeAz4t4hY1qGwzazFWnGF6w0RsWtEzMjvjwOuiYhpwDX5PcCBwLQ8zQPObMG2zczMzKrqg8BdhfdfAE7LdaQngCNz+pHAExHxcuC0nM/MRokyuhTOBM7Nr88FDi6knxfJdcA4SRNL2L6ZmZlZR0maDLwZ+HZ+L+CNwCU5S30dqVZ3ugTYJ+c3s1FgpA2uAH4i6SZJ83LahIhYAZD/bp/TJwEPFJbty2lmZmZmo81XgI8Bz+f32wFPRsTq/L5YD3qhjpTnr8z5zWwUGNE9XMDeEbFc0vbAYkl3D5C30ZmaWCdTarjNA5gwYQK9vb1NBTJhs3RfVLdoRbzN7puRWrVqVdu21QqO18zMOknSW4BHIuImST215AZZo4l5xfWuVUeq0u/HaI9lOHW2KtVNhxtLGf/Tqhwr7YxjRA2uiFie/z4i6TJgD+BhSRMjYkXuMvhIzt4HTCksPhlY3mCdC4AFADNmzIienp6mYvna+Zdz6u0jbT+2z/zpq0cc77LDe1oTzCB6e3tp9v9QBY7XzMw6bG/gbZIOAjYFtiJd8RonaUy+ilWsB9XqSH2SxgBbA4/Xr7S+jjR27NjK/H5U6best7eXuVc+0+K1Dr3O1oq6XqsMN5Yy6ppVOVbaGcewuxRK2kLSlrXXwH7AEmAhMCdnmwNcnl8vBGYr2RNYWet6aGZmZjZaRMTxETE5IqYCs4CfRsThwLXAITlbfR2pVnc6JOdf5wqXmXWnkTS7JwCX5Xs6xwDfi4grJd0AXCzpSOB+4NCcfxFpSPilpGHh3zOCbZuZmZl1m48DF0r6HPAb4KycfhbwHUlLSVe2ZnUoPjMrwbAbXBFxL7BLg/THgH0apAdw9HC3Z2ZmZtZtIqIX6M2v7yXdflGf58+sOUFtZqNMGcPCm5mZmZmZGW5wmZmZmZmZlcYNLjMzMzMzs5K4wWVmo4akZZJul3SLpBtz2raSFkv6Xf67TU6XpNMlLZV0m6TdOhu9mZmZjUZucJnZaPOGiNg1Imbk98cB10TENOCa/B7gQGBanuYBZ7Y9UjMzMxv13OAys9FuJnBufn0ucHAh/bxIriM9kHRiJwI0MzOz0csNLjMbTQL4iaSbJM3LaRNqD1nPf7fP6ZOABwrL9uU0MzMzs5YZyYOPzcyqZu+IWC5pe2CxpLsHyKsGabFOptRwmwcwYcIEent7WxJoq6xataoyMc2fvnrQPBM2ay5fGYa7n6q0j5vRbfFCd8ZsZtYsN7jMbNSIiOX57yOSLiM9YPRhSRMjYkXuMvhIzt4HTCksPhlY3mCdC4AFADNmzIienp4SP8HQ9fb2UpWY5h7340HzzJ++mlNv78xPz7LDe4a1XJX2cTO6LV7ozpjNzJrlLoVmNipI2kLSlrXXwH7AEmAhMCdnmwNcnl8vBGbn0Qr3BFbWuh6amZmZtYqvcJnZaDEBuEwSpLLtexFxpaQbgIslHQncDxya8y8CDgKWAs8C72l/yGZmZjbaucHVxaY20X1nKJad8uaWrs+snSLiXmCXBumPAfs0SA/g6DaEZmZmZusxdyk0MzMzMzMriRtcZmZmZmZmJXGDy8zMzMzMrCRucJmZmZmZmZXEDS4zMzMzM7OSDHuUQklTgPOAFwPPAwsi4quSTgTeC/wxZ/1ERCzKyxwPHAk8B3wgIq4aQexmZmZmlTNAHWlb4CJgKrAMeGdEPKH0PIuvkh5V8SwwNyJu7kTsZkUeEbs1RnKFazUwPyJeBewJHC1ppzzvtIjYNU+1xtZOwCxgZ+AA4OuSNhzB9s3MzMyqqL860nHANRExDbgmvwc4EJiWp3nAme0P2czKMuwGV0SsqJ19iYingbuASQMsMhO4MCL+EhH3kR42usdwt29mZmZWRQPUkWYC5+Zs5wIH59czgfMiuQ4YJ2lim8M2s5K05MHHkqYCrwGuB/YGjpE0G7iRdIbnCVJBc11hsT4aNNAkzSOd3WHChAn09vY2FcOEzWD+9NXD/gztVsV4+9vXq1atavr/UAWO18zMqqKujjQhIlZAapRJ2j5nmwQ8UFisVkdaUbeutepIVfr9qFos86c/1+kwKlXXq0osvb29lTlW2hnHiBtcksYCPwA+FBFPSToTOAmI/PdU4N8BNVg81kmIWAAsAJgxY0b09PQ0FcfXzr+cU29vSfuxLeZPX125eJcd3tMwvbe3l2b/D1XgeM3MrAoa1JH6zdogbdA60tixYyvz+1Gl37Le3l5O/cUznQ6jUnW9qsSy7PCeyhwr7YxjRKMUStqIVJCcHxGXAkTEwxHxXEQ8D3yLNd0G+4AphcUnA8tHsn0zMzOzKmpURwIernUVzH8fyemuI5mNYsNucOURdc4C7oqILxfSi32O3w4sya8XArMkbSJpR9KNob8e7vbNzMzMqqi/OhKpLjQnv54DXF5In61kT2BlreuhmXW/kVxb3Bs4Arhd0i057RPAYZJ2JV0KXwa8DyAi7pB0MXAnafSeoyOi8x1szczMzFqrvzrSKcDFko4E7gcOzfMWkYaEX0oaFv497Q3XzMo07AZXRPyCxn2OFw2wzMnAycPdppmZmVnVDVBHAtinQf4Aji41KDPrmM7fPWeV0d/D7eZPX83cYT74bn19wJ1Z1bX6YZbdYLifeaAy0GWcmZkNZkSDZpiZmZmZmVn/3OAyMzMzMzMribsUmpmZmVlHtbKbc3rAr6u4Vh2+wmVmZmZmZlYSN7jMzMzMzMxK4gaXmZmZmZlZSdzgMjMzMzMzK4kbXGZmZmZmZiVxg8vMzMzMzKwkbnCZmZmZmZmVxA8pMDMzMzOz0k097sfMn76auS187tqyU97csnWVxQ0uK1UrH2QI3fGlMjMzMzOrcZdCMzMzMzOzkrjBZWZmZmZmVhI3uMzMzMzMzErS9nu4JB0AfBXYEPh2RJzS7hjMzMDlkZlVS7eUSY3uz271QAhmo0lbG1ySNgTOAN4E9AE3SFoYEXe2Mw7rXs0MwjHUQt8DcayfXB5ZK3hgIGsVl0lmo1e7r3DtASyNiHsBJF0IzARcmJhZu3VVedRfxd5nlc1GjdLKpFafGDCzoWl3g2sS8EDhfR/w2jbHYLaWTv8QdaLC7LPogMsjq6BWlEf1ZYq/713DZZLZMAy33Oyv/lVGmamIaPlK+92YdCiwf0Qcld8fAewREccW8swD5uW3rwTuaXL144FHWxhu2bop3m6KFRxvK+0QES/qdBBlaKY8yunDLZPapcrHTyPdFi90X8zdFi80H/N6XSY1KI8eozr/6yodd1WJpSpxgGNpZKRxNF0etfsKVx8wpfB+MrC8mCEiFgALhrpiSTdGxIyRhdc+3RRvN8UKjteaNmh5BMMvk9ql246fbosXui/mbosXujPmEgy5jlSl/eZYqhsHOJZOx9HuYeFvAKZJ2lHSxsAsYGGbYzAzA5dHZlYtLpPMRqm2XuGKiNWSjgGuIg15enZE3NHOGMzMwOWRmVWLyySz0avtz+GKiEXAohJWXdkuP/3opni7KVZwvNakEsujduq246fb4oXui7nb4oXujLnlhlEmVWm/OZZ1VSUOcCyNtC2Otg6aYWZmZmZmtj5p9z1cZmZmZmZm642ub3BJOkDSPZKWSjqu0/HUk3S2pEckLSmkbStpsaTf5b/bdDLGIklTJF0r6S5Jd0j6YE6vZMySNpX0a0m35ng/k9N3lHR9jveifANyJUjaUNJvJP0ov69srFZNkg7Nx/vzkmbUzTs+l4f3SNq/UzE2UvXyGlxml60by+wqqcrvh6Rlkm6XdIukG3NaR445SeMkXSLp7vw92KsTsUh6Zd4ftekpSR/qUCwfzt+vJZIuyN+7Th0rH8xx3CHpQzmtLftkKOW5ktPz79NtknZrZSxd3eCStCFwBnAgsBNwmKSdOhvVOs4BDqhLOw64JiKmAdfk91WxGpgfEa8C9gSOzvu0qjH/BXhjROwC7AocIGlP4AvAaTneJ4AjOxhjvQ8CdxXeVzlWq6YlwL8CPysm5u/qLGBnUrnz9VxOdlyXlNfgMrts3VhmV0mVfj/eEBG7FobV7tQx91Xgyoj4e2AX0v5peywRcU/eH7sCuwPPApe1OxZJk4APADMi4tWkAVhm0YFjRdKrgfcCe5D+N2+RNI327ZNzaL48PxCYlqd5wJktjSQiunYC9gKuKrw/Hji+03E1iHMqsKTw/h5gYn49Ebin0zEOEPvlwJu6IWZgc+Bm4LWkB9mNaXScdDjGyfkL/kbgR4CqGqun6k9AL+lHtfZ+rTKQNNrZXp2OM8fSFeV1js1ldntirXyZXaWpSr8fwDJgfF1a2485YCvgPvKYBJ2MpW77+wG/7EQswCTgAWBb0uB4PwL278SxAhwKfLvw/r+Aj7VznzRbngPfBA5rlK8VU1df4WLNQVXTl9OqbkJErADIf7fvcDwNSZoKvAa4ngrHnLtY3AI8AiwGfg88GRGrc5YqHRdfIRU2z+f321HdWK37VLlMrHJsg6ls+VfkMnvUq9LvRwA/kXSTpHk5rRPH3MuAPwL/nbtaflvSFh2KpWgWcEF+3dZYIuJB4EvA/cAKYCVwE505VpYAr5e0naTNgYNID/fu5P+nv22X+hvV7Q0uNUjzsIstIGks8APgQxHxVKfjGUhEPBfpEv5k0mXrVzXK1t6o1iXpLcAjEXFTMblB1o7Hap0n6erc771+mjnQYg3SqnI8VTm2rucye3Sr4O/H3hGxG6kb1tGSXt+m7dYbA+wGnBkRrwGeocPdZ/O9UW8Dvt+h7W8DzAR2BF4CbEH6P9Ur/ViJiLtIXRkXA1cCt5K6QVdRqd+ntj+Hq8X6SC3lmsnA8g7FMhQPS5oYESskTSSd5asMSRuRfrjPj4hLc3KlYwaIiCcl9ZLuYxgnaUw+m1OV42Jv4G2SDgI2JXWF+ArVjNU6LCL2HcZiVS4TqxzbYCpd/rnMXi9U6vcjIpbnv49IuozUcO7EMdcH9EXE9fn9JaQGVyeP/wOBmyPi4fy+3bHsC9wXEX8EkHQp8Do6d6ycBZyVY/k86X/Wyf9Pf9su9Teq269w3QBMyyOvbEy6hLuwwzE1YyEwJ7+eQ+pzXwmSRPpi3BURXy7MqmTMkl4kaVx+vRmpoLkLuBY4JGerRLwRcXxETI6IqaRj9acRcTgVjNW61kJglqRNJO1Iuvn31x2OqaZby2uoaPkHLrPXF1X6/ZC0haQta69J9ystoQPHXEQ8BDwg6ZU5aR/gzk7EUnAYa7oT0oFY7gf2lLR5Lh9q+6Qj3zFJ2+e/LyUN9nQBnf3/9LfthcDsPFrhnsDKWtfDlijrJrV2TaT+oL8l9QH/ZKfjaRDfBaQ+tH8jtZ6PJPW7vgb4Xf67bafjLMT7T6RLqLcBt+TpoKrGDPwD8Jsc7xLg0zn9ZaSK5lLSZf1NOh1rXdw9wI+6IVZP1ZuAt+fy5C/Aw6w9GMUnc3l4D3Bgp2Oti7vS5XWO0WV2ufF2ZZldpanTvx95m7fm6Y7ad7lTxxxptMsb8zH1Q2CbDsayOfAYsHUhre2xAJ8B7s7fse8Am3TqOwb8nNTguxXYp537ZCjlOalL4Rn59+l2CgNStWJS3oiZmZmZmZm1WLd3KTQzMzMzM6ssN7jMzMzMzMxK4gaXmZmZmZlZSdzgMjMzMzMzK4kbXGZmZmZmZiVxg8vMzMzMzKwkbnB1mKRlkv4kaVVhepekvgZ5eyUdlV+fKOm7A6xz3yHGsaWkL+dln5F0v6RLJO1RyBN5XjHWjxXm7yRpoaSVkp6WdK2k1xXmT83rqC27TNJxdXFsK+myvJ0/SHpXYd4bJN0u6UlJj+V8kwrzN5F0tqSnJD0k6SOFeRvnz7Msx9BTt11J+kJe72OSvpgfGFibv0DSPZKelzR3KPvWrBtUqCwaJ+nM/B1+Nn/n39Mg39w879mc98zaA3ULcf2t8FnukvSOwvyeXBZcWrfeXXJ6byFtV0k/z2Vbn6RPF+bVl2urJP1XYX5p5VIh35y8/FFD2ddm67P8vXtY6eHNtbSjJN1b932ur/v88wDrPEfSX+uWvzXPa1RW3FpYdqKksyStUKpD3S3pM7X4JJ2Uy7zVkk6s264kfVKp7vaUpAslbTVIXBu2cHfaINzgqoa3RsTY2gQsb+fGJW0C/BSYDrwF2Ap4FXAh6QGaRbsUY42IL+Z1/B3wS9LD4nYEXgJcBvxE0l516xiXP+chwH9JelNh3hnAX4EJwOHAmZJ2zvPuBPaPiHF5/b8DziwseyIwDdgBeAPwMUkHFOb/Ang38FCD3TAPOBjYhfRgzrcA7yvMvxV4P3Bzg2XNRotOl0UbA1eTvsN7AVsDHwVOqWuozAe+kOdtDeyZl1mc11FzUeGzfAj4rqQJhfl/BF4nabtC2hzSw5mLvgf8DNgW+BfgPyS9rS7PuMK+O6mQfiLllUtI2gY4nvQAWjMbmjHAB+vS7q8rB2Htus/PB1nnF+vqSbvUzR9XP0/StsCvgM2AvSJiS+BNwDjg7/JyS4GPAT9usM3ZwBHA3qT60WbA1waJ67lBPoe1kBtcBulLOhk4OCKWRMRzEfFMRFwSESc2uY4TgV9FxCcj4vGIeDoiTic94fwLjRaIiBtJlYRdAfJZnHcA/xURqyLiF8DCHB8R8XBEFCuAzwEvL7yfDZwUEU9ExF3At4C5edm/RsRX8jobFTJzgFMjoi8iHgROrS2blz8jIq4B/tzk/jCzoTsCeClwaETcFxF/i4grgQ8An5W0VT5r+xng2Ii4MudZBryT1Kh5d6MVR8RVwNOsqbxAOrnzQ2AWQD7j+07g/LrFpwLn57Lx96RG0s40p7RyKftfwOnAo03GY2Zr/G/gP4tXxzvkI6Ty6d25PCMiHoiID0bEbfn9uRFxRc5X763AWXmZVaR6179J2rw94dtg3OAygH2BqyLimRGs403A9xukXwzs3ehLL2lP4NWkszYArwCei4ji2eVbKVRsJL1U0pPAn4D/BGpX2LYhndW5tb9lB7HzCJY1s9Z4E3BFg7LoB8CmpKter8uv1+oKmCsZV+R1rCV3t3kzsDHpSnnReaRGEcD+pJNA9Vf2vgLMlrSRpFfmOK6uy/OH3N3wvyWNz9sttVxS6vI9A/hGk+szs7XdCPSS6hOdtC9waUQ8P8zllafi+01IV9dr3i/pcUk3qdC92trDDa5q+KHSfUlPSvphB7Y/nkJ3FqX7FZ7M/YDvqct7cyHWJyXtX1jHigbrXkE6zrYppD0q6U+ky+dfJ51hBhgLrKxbfiWwZe1NRNyfuxSOBz4F3F1Ytpa/4bKDqN/2SmBso/slzEaxKpRF65QjEbGadAVnfJ4ezWn1VuT5Ne/MJ2ieIV0t/3xEPFm37v8Bts0NqdmkBli9H5G6QP+JVOacFRE35HmPAv9Iurq2O6nMqV0hK61cylfjvk660jfcSpqZwaeBYyW9qEXr+8+6etK5dfMfLcyrNfS2o3EdqllXAEfl+8S2Bj6e02snu08nNb62B/4LOEfS3iPYng2RG1zVcHBEjMvTwcBqYKMG+TYC/lbC9h8DJtbeRMQtuVHzr6QzJEW7FWIdl7vpQKp0TGRdE4HngScKaeNJFYn/BHpY81lXke4fK9qKBpfPI+Jx4Fzgcklj8rK1/AMu24/6bW8FrIqIaHJ5s9Gg02VRw3Ikf8fH5/mPAuNzWr2JrN217uL8WTYndSWcLel9DZb7DnAM6R6ry+q2vS1wJfBZ0pW1KcD+kt4P6cpaRNwYEasj4uG8nv1y18cyy6X3A7dFxK+aXJeZNRARS0gnVY4bLG+TvlRXT5pTN398Yd6Xctpa9bBhOBu4gHS17g7g2pzeBxARN0fEY7mcWkQ6KfSvI9ieDZEbXNV0P6lCUTs7Sr7SsgPwhxK2dw2pgrDFoDn7dzVwaIP0d5Lu7Xq2mJjvhTiVdE/U+3Pyb4ExkoqXwHeh/5vBx5DO1mwVEU+Qzg4Vb04daNl6d4xgWbPRqt1l0dXAgQ3KoncAfwGuI10Z/wt1lYW8zIGk8mwd+b6IK0j3OtT7DqkcWlRfVgEvI3V1Pi9XVvpoPKDQC5uqhVRyubQP8HalkQ8fInW1PFXS/2ly3Wa2xgnAe4FJg2UsydWk7/Ow6uUR8XxEnBARUyNiMqmceDBPDRdh7S6IVjI3uCooIu4Hrge+IGlsHkXwo6SzzdcVsm4gadPCVLwatVHdvEZng2vOI1UKLpP0akkbStqUdG9Asz5DGu3rZKWh3beUdCypi87HB1juFNKoXZvm+zYuJd0cv0W+3D2TVBlC0r9KeqWkDfKl/y8Dv8lXu2qf41OStpH096TC85zahpSGZ940v9047xcVlv2IpEmSXgLMr1t247ysWLNv/f2xUa0DZdF3SGdkv5+7xmyUuy2fDpwYESsjYiWpvPmapANynqmke0j78jrWIWkycAANGjsRcR9p9MFPNlj0t2lxvSuXPS8G/o18b5Wk1xbKpe1yrL05TiivXJpLGk121zzdmPdLo89gZgOIiKXARaQBejrhy6Qr2OdK2gEgf++/LOkf8vuNclmxAenk9Ka5a3HtkTp/l7sb75TX99lad2NJh+QyfANJ+5EGF1rY/o+5HosITx2cgGXAvg3Sp5AqEA+RushcBexUmH8i6QxFceorrLN+3ucGiWNr0o3hfyDd7/AH0o3qexTyRJ63qjB9pTD/1aTL8k/leb3APxXmT83rGFNIE6kCdGx+vy3pnq5nSGfX31XIeyxwX573EOks8w6F+ZuQLqs/BTwMfKTBvq7fL1MLcXwReDxPXySdoa4t29tg2Z5OHz+ePLVqqlBZtC3wzfwd/lMuH45qkO9IYEnO83BeZpu6uP5WKKtWkAaX2DzP76nF2WDdR5EaTbX3bwRuIN1D9RBppMHaeg4rlEsrSI2kFxeWLa1cqltPb6P95MmTp8ZTfZmXy7o/F7/7OT2Alze5znNIo58W60mP5nlTqasD1S37klxWPETqdnw36crb5oV115cVc/O8VwD3AM+S6m/15czPc/n1FOlk0axO7//1bVL+R5iZmZmZmVmLuUuUmZmZmZlZSdzgWk9I+oSkVQ2mKzodm5mtP1wWmVk3k3RHP2XY4Z2OzarLXQrNzMzMzMxKMtBoUR03fvz4mDp1ase2/8wzz7DFFiMZKb07+XOvXwb63DfddNOjEdGqh0F2vZGWSVU8xqoWU9XiAcfUjHbF4zJpjaGUR1U7XhqpeoxVjw8cY6s0G+OQyqNOj9ox0LT77rtHJ1177bUd3X6n+HOvXwb63MCNUYGyoCrTSMukKh5jVYupavFEOKZmtCsel0nDK4+qdrw0UvUYqx5fhGNslWZjHEp55Hu4zMzMzMzMSuIGl5mZmZmZWUnc4DIzMzMzMyuJG1xmZmZmZmYlqfQohTawqcf9uKWyPKZ1AAAbjUlEQVTrW3bKm1u6PjOz0c7lsHWD2x9c+f/bu/tgyer6zuPvz0J8DApKMRIgGaxMjCRslMwiidlkdAwCphzdaArK6GCoJQ+Y6IbUBmNVtGStwuwiG/OAQWEFi4iEaJiSSZQQbrl5APEBeRAJE5yFkVkmhgczxcZkku/+cX4Xmsu9Mz33ntO35973q6qr+/zOr/t8z+nT3+5fn9/5Hc7scV91P5X2j0e4JEmSJGkgNrgkSZIkaSA2uCRJkiRpIDa4JEmSJGkgNrgkSZIkaSA2uCRJkiRpIA4LL0laFRY7hPu5x+/pdUhtSdLq4hEuSZIkSRqIDS5JkiRJGohdCvW42e42fXaf8Wr0kiRJWs08wiVJkiRJA7HBJUmSJEkDscElSZIkSQPZZ4MryWVJdiW5Y6TseUmuT3JPuz+slSfJB5NsS3JbkhNGnrO51b8nyeZhVkfSapbkvyS5M8kdST6e5BlJjk1yc8s9n0jytFb36W16W5u/dnmjlyRJK9E4R7g+Cpwyp+w84IaqWgfc0KYBTgXWtdvZwMXQNdCAdwMvA04E3j3bSJOkPiQ5CvgVYH1V/SBwEHA68H7gopavHgbOak85C3i4qr4XuKjVkyRJ6tU+G1xV9TngoTnFm4DL2+PLgdeNlF9RnZuAQ5McCbwauL6qHqqqh4HreWojTpKW6mDgmUkOBp4F7AReCVzT5s/NV7N57BpgY5JMMFZJkrQKLHZY+DVVtROgqnYmOaKVHwXcP1JvRytbqPwpkpxNd3SMNWvWMDMzs8gQl2737t3Luvx9Off4PYO87ppn9vfa07z95pr293soK2W9q+obSf4HcB/w/4DPAl8EHqmq2R16NPc8npeqak+SR4HnA98cfd0+c9I0butpi2nIeBab1/rMifsy7rqvpvdNkg50fV+Ha75/h2sv5U8trLoEuARg/fr1tWHDht6C218zMzMs5/L3pa9rZc117vF7uPD2fnaN7W/a0MvrTMK0v99DWSnr3bopbwKOBR4B/oium/Ncs7lnrLzUZ06axm09bTENGc9ic2afOXFfxs2Zq+l9k6QD3WJHKXywdRWk3e9q5TuAY0bqHQ08sJdySerLq4CvV9XfV9W/AJ8EfpSua/Psr+XR3PN4Xmrzn8tTu09LkiQtyWIbXFuA2ZEGNwPXjpS/pY1WeBLwaOt6+Bng5CSHtX+hT25lktSX+4CTkjyrnYu1EfgqcCPwhlZnbr6azWNvAP6iquY98i5JkrRY++wjkeTjwAbg8CQ76EYbvAC4OslZdD9y3tiqbwVOA7YBjwFvBaiqh5KcD9zS6r23qvwnWVJvqurmJNcAXwL2AF+m6wp4HXBVkv/Wyi5tT7kU+FiSbXRHtk6ffNSSJGml22eDq6rOWGDWxnnqFnDOAq9zGXDZfkUnSfuhqt5N96fQqHvpLkcxt+4/8cSfRZIkSYNYbJdCSZIkSdI+2OCSJEmSpIHY4JIkSZKkgdjgkiRJkqSB2OCSJElahCTHJLkxyV1J7kzy9lb+vCTXJ7mn3R/WypPkg0m2JbktyQkjr7W51b8nyeaFlinpwGODS5IkaXH2AOdW1YuBk4BzkhwHnAfcUFXrgBvaNMCpwLp2Oxu4GLoGGt0Iqy+jG1X13bONNEkHPhtckiRJi1BVO6vqS+3xPwJ3AUcBm4DLW7XLgde1x5uAK6pzE3BokiOBVwPXV9VDVfUwcD1wygRXRdKAbHBJkiQtUZK1wEuBm4E1VbUTukYZcESrdhRw/8jTdrSyhcolrQD7vPCxJEmSFpbkO4E/Bt5RVd9KsmDVecpqL+Vzl3M2XVdE1qxZw8zMzFjxrXkmnHv8nrHqjmPc5e6P3bt3D/K6fZn2+MAY+zJEjDa4JEmSFinJd9A1tq6sqk+24geTHFlVO1uXwV2tfAdwzMjTjwYeaOUb5pTPzF1WVV0CXAKwfv362rBhw9wq8/qdK6/lwtv7+8m3/U3jLXd/zMzMMO76LIdpjw+MsS9DxGiXQkmSpEVIdyjrUuCuqvrAyKwtwOxIg5uBa0fK39JGKzwJeLR1OfwMcHKSw9pgGSe3MkkrgEe4JEmSFuflwJuB25Pc2sp+A7gAuDrJWcB9wBvbvK3AacA24DHgrQBV9VCS84FbWr33VtVDk1kFSUOzwSVJkrQIVfWXzH/+FcDGeeoXcM4Cr3UZcFl/0UmaFnYplCRJkqSBLLrBleRFSW4duX0ryTuSvCfJN0bKTxt5zjvb1dXvTvLqflZBkjpJDk1yTZKvJbkryY8keV6S65Pc0+4Pa3WT5IMtJ92W5ITljl+SJK08i25wVdXdVfWSqnoJ8MN0fZE/1WZfNDuvqrYCtCuvnw78AN3F/H4/yUFLC1+SnuS3gT+rqu8HfojuIqTnATdU1TrghjYNcCqwrt3OBi6efLiSJGml66tL4Ubg76rq/+ylzibgqqr6dlV9ne6E0RN7Wr6kVS7Jc4AfpxsxjKr656p6hC73XN6qXQ68rj3eBFxRnZuAQ9vwzZIkSb3pq8F1OvDxkem3tS46l81238GrqEsa1guBvwf+V5IvJ/lIkmcDa9qwy7T7I1p9c5IkSRrckkcpTPI04LXAO1vRxcD5dFdIPx+4EPg5Br6K+hCm/WrYfV41flSfV6Sf5u0317S/30NZQet9MHAC8MtVdXOS3+aJ7oPzmXhOmsZtPW0xDRnPYvNanzlxX8Zd99X0vknSga6PYeFPBb5UVQ8CzN4DJPkw8Ok2udDV1Z9ksVdRH8K0Xw37zPOuG+R1zz1+T29XpB/iavRDmfb3eygraL13ADuq6uY2fQ1dg+vBJEdW1c7WZXDXSP2J5qRp3NbTFtOQ8Sw2Z/aZE/dl3Jy5mt43STrQ9dGl8AxGuhPOOQfi9cAd7fEW4PQkT09yLN2J6p/vYfmSRFX9X+D+JC9qRRuBr9Llns2tbDNwbXu8BXhLG63wJODR2a6HkiRJfVnSX3ZJngX8JPDzI8W/leQldF1zts/Oq6o7k1xN9wNoD3BOVf3rUpYvSXP8MnBl6+p8L/BWuj+Wrk5yFnAf8MZWdytwGt0APo+1upIkSb1aUoOrqh4Dnj+n7M17qf8+4H1LWaYkLaSqbgXWzzNr4zx1Czhn8KC0aGsH6jYtSdIk9TVKoSRJkiRpDhtckiRJkjQQG1ySJEmSNBAbXJIkSZI0EBtckiRJkjQQG1ySJEmSNBAbXJIkSZI0EBtckiRJkjQQG1ySJEmSNBAbXJIkSZI0EBtckiRJkjQQG1ySJEmSNBAbXJIkSZI0EBtckiRJkjQQG1ySJEmSNJAlNbiSbE9ye5Jbk3yhlT0vyfVJ7mn3h7XyJPlgkm1JbktyQh8rIEmjkhyU5MtJPt2mj01yc8tJn0jytFb+9Da9rc1fu5xxS5KklamPI1yvqKqXVNX6Nn0ecENVrQNuaNMApwLr2u1s4OIeli1Jc70duGtk+v3ARS0nPQyc1crPAh6uqu8FLmr1JEmSejVEl8JNwOXt8eXA60bKr6jOTcChSY4cYPmSVqkkRwOvAT7SpgO8ErimVZmbk2Zz1TXAxlZfkiSpNwcv8fkFfDZJAX9QVZcAa6pqJ0BV7UxyRKt7FHD/yHN3tLKdS4xBkmb9T+C/Aoe06ecDj1TVnjY9m3dgJCdV1Z4kj7b63xx9wSRn0x2VZ82aNczMzCw6uN27dy/p+UOYtphG4zn3+D17rzwha545uVjGfS+m+X2TJD3ZUhtcL6+qB1qj6vokX9tL3fn+Oa6nVOrxx81STfsXyFA/APr8cTHN22+uaX+/h7JS1jvJTwG7quqLSTbMFs9TtcaY90RB90fSJQDr16+vDRs2zK0ytpmZGZby/CFMW0yj8Zx53nXLG0xz7vF7uPD2pX5djmf7mzaMVW+a3zdJ0pMt6Rukqh5o97uSfAo4EXgwyZHt6NaRwK5WfQdwzMjTjwYemOc1e/txs1TT/gUy1I+RPn9cjPvjYRpM+/s9lBW03i8HXpvkNOAZwHPojngdmuTgdpRrNO/M5qQdSQ4Gngs8NPmwJUnSSrboc7iSPDvJIbOPgZOBO4AtwOZWbTNwbXu8BXhLG63wJODR2a6HkrRUVfXOqjq6qtYCpwN/UVVvAm4E3tCqzc1Js7nqDa3+U45wSZIkLcVSBs1YA/xlkq8Anweuq6o/Ay4AfjLJPcBPtmmArcC9wDbgw8AvLWHZkjSuXwd+Nck2unO0Lm3llwLPb+W/yhMjqkrSWJJclmRXkjtGyvb78jhJNrf69yTZPN+yJB24Ft1vrKruBX5onvJ/ADbOU17AOYtd3kqwdkrOR5BWuqqaAWba43vpujvPrfNPwBsnGpikleajwO8CV4yUzV4e54Ik57XpX+fJl8d5Gd3lcV6W5HnAu4H1dOeRfjHJlqp6eGJrIWlQQwwLL0mStOJV1ed46rmf+3t5nFcD11fVQ62RdT1wyvDRS5qUyQy7JEmStDrs7+VxFip/isWO5Nz3pQ2GGNl22kfMnfb4wBj7MkSMNrgkSZKGt9ClKMa6RAUsfiTn37ny2l4vbTDECMTTPmLutMcHxtiXIWK0S6EkSVJ/HmxdBRnz8jhjXTZH0oHLBpckSVJ/9vfyOJ8BTk5yWBvR8ORWJmmFsEuhJEnSIiT5OLABODzJDrrRBi8Ark5yFnAfT4yGuhU4je7yOI8BbwWoqoeSnA/c0uq9t6q8CLu0gtjgkiRJWoSqOmOBWft1eZyqugy4rMfQJE0RuxRKkiRJ0kA8wiVJ0pRYe951Y9U79/g9nDlm3e0XvGYpIUmSlsgjXJIkSZI0EBtckiRJkjQQG1ySJEmSNBAbXJIkSZI0EBtckiRJkjQQRymUJPVi3BH29mZ/Rt+TJOlAsOgjXEmOSXJjkruS3Jnk7a38PUm+keTWdjtt5DnvTLItyd1JXt3HCkgS7DUnPS/J9UnuafeHtfIk+WDLSbclOWF510CSJK1ES+lSuAc4t6peDJwEnJPkuDbvoqp6SbttBWjzTgd+ADgF+P0kBy1h+ZI0aqGcdB5wQ1WtA25o0wCnAuva7Wzg4smHLEmSVrpFN7iqamdVfak9/kfgLuCovTxlE3BVVX27qr4ObANOXOzyJWnUXnLSJuDyVu1y4HXt8SbgiurcBBya5MgJhy1Jkla4Xs7hSrIWeClwM/By4G1J3gJ8ge4f54fpfvjcNPK0HczTQEtyNt2/zaxZs4aZmZk+QlyU3bt397r8c4/f09trDWnNM/uLdTnfv/3V9/t9oFiJ6z0nJ62pqp3QNcqSHNGqHQXcP/K02Zy0c85r9ZaTpnFb9xlTH3mjz/zTlwM9pknsc9O4b0vStFhygyvJdwJ/DLyjqr6V5GLgfKDa/YXAzwGZ5+n1lIKqS4BLANavX18bNmxYaoiLNjMzQ5/LP1BOBD/3+D1ceHs/46lsf9OGXl5nEvp+vw8UK22958lJC1adp2zQnDSN27rPmPrIcX3mn74c6DFNIg9P474tSdNiScPCJ/kOuh82V1bVJwGq6sGq+teq+jfgwzzRbXAHcMzI048GHljK8iVp1Hw5CXhwtqtgu9/Vys1JkiRpcIv+yy7d38aXAndV1QdGyo+c7b4DvB64oz3eAvxhkg8A30V3ovrnF7t8HRj6GCZ61PYLXtPr62nlWCgn0eWezcAF7f7akfK3JbkKeBnw6EjukiRJ6sVS+ki8HHgzcHuSW1vZbwBnJHkJXdec7cDPA1TVnUmuBr5KN5rYOVX1r0tYviSNWignXQBcneQs4D7gjW3eVuA0ugF8HgPeOtlwJUnSarDoBldV/SXznwOxdS/PeR/wvsUuU5IWspecBLBxnvoFnDNoUJIkadVb0jlckiRJkqSF2eCSJEmSpIHY4JIkSZKkgdjgkiRJkqSB2OCSJEmSpIHY4JIkSZKkgdjgkiRJkqSBLOXCx5IkSZK0bNaed12vr/fRU57d6+uBR7gkSZIkaTA2uCRJkiRpIHYplCRpBeu7u832C17T6+tJ0krnES5JkiRJGogNLkmSJEkaiA0uSZIkSRrIxM/hSnIK8NvAQcBHquqCSccwjrXnXce5x+/hzJ77vmtp+j4XATwfYTU7UPLREGY/S+Y5aXqs5pwkrWQTPcKV5CDg94BTgeOAM5IcN8kYJAnMR5KmizlJWrkm3aXwRGBbVd1bVf8MXAVsmnAMkgTmI0nTxZwkrVCT7lJ4FHD/yPQO4GV9vPAQXc20OqzGrlV2owSWIR+tpn1MK9d8+/dS921zEjBgTpK0vCbd4Mo8ZfWkCsnZwNltcneSuwePagG/AocD31yu5S8X13vly/ufNLm39f6ewYNZPvvMR9BvTprGfWzaYpq2eMCYxrHUeObkpL1Z1TlpCfmo1/1lP96v/TFV+/Q8pj0+MMZevOL9Y8c4dj6adINrB3DMyPTRwAOjFarqEuCSSQa1kCRfqKr1yx3HpLneq8tqXW/GyEfQb06axm09bTFNWzxgTOOYtngOUIP9RjoQ3p9pj3Ha4wNj7MsQMU76HK5bgHVJjk3yNOB0YMuEY5AkMB9Jmi7mJGmFmugRrqrak+RtwGfohjy9rKrunGQMkgTmI0nTxZwkrVwTvw5XVW0Ftk56uYs0FV0bl4Hrvbqs1vVejnw0jdt62mKatnjAmMYxbfEckAbMSQfC+zPtMU57fGCMfek9xlQ95RxxSZIkSVIPJn0OlyRJkiStGqu+wZXklCR3J9mW5Lx55n9PkhuS3JZkJsnRyxFn35JclmRXkjsWmJ8kH2zb5bYkJ0w6xiGMsd7fn+Rvknw7ya9NOr6hjLHeb2rv821J/jrJD006xpUiyRuT3Jnk35KsnzPvne0zdXeSVy/w/GOT3JzkniSfaCfP9xnfJ5Lc2m7bk9y6QL3tSW5v9b7QZwxzlvOeJN8Yiem0BertNVf3HNN/T/K19nn4VJJDF6g36DYa4/vp6e393Nb2mbV9xzBnecckuTHJXW0ff/s8dTYkeXTk/fzNIWPSE6Ztf1lkjL+a5Kvts3dDkolfBmDcXJPkDUlqbp6fhHFiTPIzbVvemeQPpy3GJN/d8smX2/s9b+4fML7J/g6uqlV7ozsp9e+AFwJPA74CHDenzh8Bm9vjVwIfW+64e1r3HwdOAO5YYP5pwJ/SXRfkJODm5Y55Qut9BPAfgPcBv7bc8U5wvX8UOKw9PnWlvN/LtK1fDLwImAHWj5Qf13LM04FjW+45aJ7nXw2c3h5/CPjFAWO9EPjNBeZtBw6fwPZ6z74+a+Pk6p5jOhk4uD1+P/D+SW+jMb+ffgn4UHt8OvCJgd+rI4ET2uNDgL+dJ6YNwKeH3m+8Tf/+ssgYXwE8qz3+xWmMsdU7BPgccNNonp+WGIF1wJdHvtePmMIYL5n9fmvfj9snHONEfwev9iNcJwLbqureqvpn4Cpg05w6xwE3tMc3zjP/gFRVnwMe2kuVTcAV1bkJODTJkZOJbjj7Wu+q2lVVtwD/MrmohjfGev91VT3cJm+iu/6LFqGq7qqq+S5Gugm4qqq+XVVfB7bR5aDHJQndHzvXtKLLgdcNEWdb1s8AHx/i9Xs2Tq7uTVV9tqr2tMnl+jyMs86b6PYR6PaZje19HURV7ayqL7XH/wjcBRw11PK0X6Zuf1lMjFV1Y1U91iaX47M3bq45H/gt4J8mGVwzToz/Gfi92e/1qto1hTEW8Jz2+LnMcx3MIU36d/Bqb3AdBdw/Mr2Dp355fAX46fb49cAhSZ4/gdiW2zjbRivTWXT/6qhf43ymng88MvJjf8jP3X8EHqyqexaYX8Bnk3wxydkDxTDrba3LxmVJDptn/nLmo59j4c/DkNtonHV+vE7bZx6l24cG17qjvRS4eZ7ZP5LkK0n+NMkPTCIeTff+Mnf5zb4+x8vxXbTPGJO8FDimqj49ycBGjLMdvw/4viR/leSmJKdMLLrOODG+B/jZJDvoRub85cmENrZev3cmPiz8lJnvn525wzb+GvC7Sc6kO3z8DWDP3CetQONsG60wSV5B9yX3Y8sdyzRL8ufAC+aZ9a6qunahp81TNvcz1cvnbsz4zmDvR7deXlUPJDkCuD7J19o/gvttb/EAF9P9W1zt/kK6Rs6TXmKe5y4pH42zjZK8iy7fX7nAy/S2jeYLcZ6yQfaX/ZXkO4E/Bt5RVd+aM/tLwPdU1e52Tsaf0HVv0rCmdn9ZzPKT/CywHviJQSOaZ9HzlD0eY5J/B1wEnDmpgOYxznY8mO5zt4HuKOH/TvKDVfXIwLHNGifGM4CPVtWFSX4E+FiL8d+GD28svX5eVnuDawdwzMj00cw5pFlVDwD/CR7/kvnpqnp0YhEun31uG60sSf498BHg1Kr6h+WOZ5pV1asW8bRxPlPfpOu2cHD7B3pRn7t9xZfkYLq89sN7eY0H2v2uJJ+i6yKyqMbEuNsryYeB+f417j0fjbGNNgM/BWys1qF/ntfobRvNY5x1nq2zo72nz2XvXWSWLMl30DW2rqyqT86dP9oAq6qtSX4/yeFV9c0h49J07i8LLH/WvJ/jJK+i+zPmJ6rq2xOKbda+YjwE+EFgpvXGfAGwJclrq2qwwYX2M8bZOjdV1b8AX09yN10D7JbJhDhWjGcBpwBU1d8keQZwODDp7o8L6fV7Z7V3KbwFWJduVLCn0Z1EumW0QpLD2z8aAO8ELptwjMtlC/CWNkrLScCjVbVzuYPSMJJ8N/BJ4M1V9bfLHc8KtQU4vY0Udizdl9/nRyu0H/Y3Am9oRZuBhY6YLcWrgK9V1Y75ZiZ5dpJDZh/TDSIx70hOSzWnT/zrF1jOPnN1zzGdAvw68NqR80nm1hl6G42zzlvo9hHo9pm/WKhx2Id2vs+lwF1V9YEF6rxg9rygJCfS/c7wD5zhTd3+spgYW3e9P6D77C3HD++9xlhVj1bV4VW1tqrW0p1nNsnG1j5jbP6EbgASkhxO18Xw3imL8T5gY4vxxcAzgL+fYIz70u/v4KWMuLESbnSjkPwt3Wgq72pl76X7AEGXlO5pdT4CPH25Y+5pvT8O7KQbHGIH3T8NvwD8Qpsf4PfadrmdCY/Cs4zr/YJW/i3gkfb4Ocsd9wTW+yPAw8Ct7faF5Y75QL3RNRp2AN8GHgQ+MzLvXe0zdTfdkcTZ8q3Ad7XHL6RriG2jGyW195wDfHT2vR8p+y5g60gMX2m3O2dz40Db62Mtx9xG9wV35Nx42vRTcvWAMW2j67s/+3n40NyYJrGNxvh+ekbbR7a1feaFA2+XH6PrUnPbyLY5bU4ueVvbHl+h+zH6o0PG5G1695dFxvjnLW/O7l9bpi3GOXVnWIbfR2NsxwAfAL7a8uvpUxjjccBftVxxK3DyhOOb6O/gtBeVJEmSJPVstXcplCRJkqTB2OCSJEmSpIHY4JIkSZKkgdjgkiRJkqSB2OCSJEmSpIHY4JIkSZKkgdjgkiRJkqSB2OCSJEmSpIH8f68pkfHIk2bRAAAAAElFTkSuQmCC\n"},"metadata":{"needs_background":"light"}}]},{"metadata":{},"cell_type":"markdown","source":"## Standardize data\n#### Standardization is a useful technique to transform attributes with a Gaussian distribution and differing means and standard deviations to a standard Gaussian distribution with a mean of 0 and a standard deviation of 1"},{"metadata":{"trusted":true},"cell_type":"code","source":"from sklearn.preprocessing import StandardScaler\narray = data.values\n#separate array into input and output components\nX = array[:,0:11]\nY = array[:,11]\nscaler = StandardScaler().fit(X)\nrescaledX = scaler.transform(X)\n# summarize transformed data\n#set_printoptions(precision=3)\nprint(rescaledX[0:5,:])","execution_count":17,"outputs":[{"output_type":"stream","text":"[[ 7.69712416e-01 -1.12341031e+00 -1.90047029e-01 1.37605977e-01\n -6.06718766e-01 -7.20629784e-01 -3.11684110e-01 -1.33070267e+00\n -2.25681314e-02 -3.60971652e-01 -9.25122883e-01]\n [ 5.07884366e-01 -4.10857567e-01 -3.76275096e-01 -2.43225309e-01\n -1.18128598e+00 -9.24503172e-01 3.20837658e+00 -1.30322672e+00\n -5.92370366e-01 9.02050407e-01 1.03699430e+00]\n [ 9.00626442e-01 -4.10857567e-01 -9.16336490e-01 -8.65106417e-03\n -1.05360437e+00 -4.63244125e-02 -3.11684110e-01 -1.30383154e+00\n -4.59030969e-01 -1.40916462e+00 2.31808537e+00]\n [ 7.69712416e-01 -5.74065761e-01 -7.11485617e-01 -8.70124978e-01\n 1.59370848e-01 6.27395116e-01 -3.11684110e-01 -1.30201709e+00\n -7.01486075e-01 -2.30020073e+00 6.98862456e-01]\n [ 1.42428254e+00 2.04070778e-03 -3.66963693e-01 -1.04957427e+00\n -4.79037163e-01 2.00315519e-01 -3.11684110e-01 -1.30184429e+00\n -7.71259861e-02 -1.97723618e+00 1.44656245e+00]]\n","name":"stdout"}]},{"metadata":{"trusted":true},"cell_type":"code","source":"array = test.values\nscaler = StandardScaler().fit(array)\nrescaledt = scaler.transform(array)\n# summarize transformed data\n#set_printoptions(precision=3)\nprint(rescaledt[0:5,:])","execution_count":18,"outputs":[{"output_type":"stream","text":"[[ 0.45558229 -0.67113515 -0.90455611 -0.037747 -0.22296129 -1.52565769\n -0.30367585 -1.33755567 0.07989345 2.01150387 -0.07427413]\n [ 0.45558229 -0.57637626 -0.73276977 -1.41794241 -1.95412296 1.13648289\n -0.30367585 -1.34301578 -1.99658599 -1.46614536 2.76572023]\n [-0.01747439 -1.14544186 -0.65139729 -3.34261399 1.38454597 -2.04630698\n -0.30367585 -1.35675411 -3.01305074 0.09826365 -0.3495563 ]\n [ 0.57384646 -1.14544186 -0.94072165 -0.57325036 -0.3775293 -1.89531242\n -0.30367585 -1.19418387 -0.51880608 -0.67403953 -0.5147256 ]\n [-1.43664442 1.61934109 0.79522452 -0.26667874 -0.5630109 0.24805848\n -0.30367585 -1.12831034 -0.88045023 -0.49124588 0.93050578]]\n","name":"stdout"}]},{"metadata":{},"cell_type":"markdown","source":"## Feature selection\n\n"},{"metadata":{},"cell_type":"markdown","source":"#### it's the process of selecting a subset of relevant features for use in model construction"},{"metadata":{},"cell_type":"markdown","source":"### Chose Recursive Feature Elimination\n#### This is an automatic feature selection technique\n#### Used logistic regression it is a good baseline as it is fast to train and predict and scales well.\n"},{"metadata":{"trusted":true},"cell_type":"code","source":"from sklearn.feature_selection import RFE\nfrom sklearn.linear_model import LogisticRegression\n\narray = data.values\nX = array[:,0:11]\nY = array[:,11]\n# feature extraction\nmodel = LogisticRegression()\nrfe = RFE(model,8)\nfit = rfe.fit(X,Y)\nprint(\"Num Features:\", fit.n_features_)\nprint(\"Selected Features:\", fit.support_)\nprint(\"Feature Ranking:\", fit.ranking_)","execution_count":19,"outputs":[{"output_type":"stream","text":"Num Features: 8\nSelected Features: [ True False True False True True True True False True True]\nFeature Ranking: [1 3 1 2 1 1 1 1 4 1 1]\n","name":"stdout"}]},{"metadata":{"trusted":true},"cell_type":"code","source":"X[:,fit.support_]","execution_count":20,"outputs":[{"output_type":"execute_result","execution_count":20,"data":{"text/plain":"array([[ 5. , 0.951, 0.975, ..., 0.282, 5.661, 1.041],\n [ 4. , 0.931, 0.957, ..., 0.6 , 6.537, 1.453],\n [ 5.5 , 0.873, 0.961, ..., 0.593, 4.934, 1.722],\n ...,\n [-1.5 , 1.091, 0.991, ..., 16.918, 5.889, 1.131],\n [ 2. , 0.849, 1.017, ..., 17.131, 6.055, 1.27 ],\n [-1. , 1.066, 0.998, ..., 17.151, 5.853, 1.136]])"},"metadata":{}}]},{"metadata":{"trusted":true},"cell_type":"code","source":"drop=data.drop(['FULL_AcidicMolPerc', 'FULL_DAYM780201', 'AS_DAYM780201'],axis=1)\ndrop","execution_count":21,"outputs":[{"output_type":"execute_result","execution_count":21,"data":{"text/plain":" FULL_Charge FULL_AURR980107 FULL_GEOR030101 FULL_OOBM850104 \\\n0 5.0 0.951 0.975 -3.663 \n1 4.0 0.931 0.957 -4.011 \n2 5.5 0.873 0.961 -2.512 \n3 5.0 0.895 0.999 -1.362 \n4 7.5 0.932 0.979 -2.091 \n... ... ... ... ... \n3033 1.0 0.945 1.006 -2.151 \n3034 -6.5 1.133 1.015 -1.675 \n3035 -1.5 1.091 0.991 -0.918 \n3036 2.0 0.849 1.017 -2.722 \n3037 -1.0 1.066 0.998 -2.080 \n\n NT_EFC195 AS_MeanAmphiMoment AS_FUKS010112 CT_RACS820104 CLASS \n0 0 0.282 5.661 1.041 1 \n1 1 0.600 6.537 1.453 1 \n2 0 0.593 4.934 1.722 1 \n3 0 0.614 4.316 1.382 1 \n4 0 0.616 4.540 1.539 1 \n... ... ... ... ... ... \n3033 0 16.706 5.598 1.144 0 \n3034 0 16.897 6.194 1.639 0 \n3035 0 16.918 5.889 1.131 0 \n3036 0 17.131 6.055 1.270 0 \n3037 0 17.151 5.853 1.136 0 \n\n[3038 rows x 9 columns]","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
FULL_ChargeFULL_AURR980107FULL_GEOR030101FULL_OOBM850104NT_EFC195AS_MeanAmphiMomentAS_FUKS010112CT_RACS820104CLASS
05.00.9510.975-3.66300.2825.6611.0411
14.00.9310.957-4.01110.6006.5371.4531
25.50.8730.961-2.51200.5934.9341.7221
35.00.8950.999-1.36200.6144.3161.3821
47.50.9320.979-2.09100.6164.5401.5391
..............................
30331.00.9451.006-2.151016.7065.5981.1440
3034-6.51.1331.015-1.675016.8976.1941.6390
3035-1.51.0910.991-0.918016.9185.8891.1310
30362.00.8491.017-2.722017.1316.0551.2700
3037-1.01.0660.998-2.080017.1515.8531.1360
\n

3038 rows × 9 columns

\n
"},"metadata":{}}]},{"metadata":{"trusted":true},"cell_type":"code","source":"drop_test = test.drop(['FULL_AcidicMolPerc', 'FULL_DAYM780201', 'AS_DAYM780201'],axis=1)\ndrop_test","execution_count":22,"outputs":[{"output_type":"execute_result","execution_count":22,"data":{"text/plain":" FULL_Charge FULL_AURR980107 FULL_GEOR030101 FULL_OOBM850104 \\\n0 4.0 0.873 0.987 -4.833 \n1 4.0 0.892 0.931 -0.584 \n2 2.0 0.901 1.039 -5.664 \n3 4.5 0.869 0.982 -5.423 \n4 -4.0 1.061 0.976 -2.002 \n.. ... ... ... ... \n753 -1.5 1.100 0.991 -1.987 \n754 -1.0 1.085 1.027 -0.745 \n755 -1.0 1.108 1.033 -1.789 \n756 -1.0 0.955 1.023 1.141 \n757 -7.0 1.078 1.009 -0.066 \n\n NT_EFC195 AS_MeanAmphiMoment AS_FUKS010112 CT_RACS820104 \n0 0 0.382 7.225 1.234 \n1 0 0.320 4.942 1.853 \n2 0 0.164 5.969 1.174 \n3 0 2.010 5.462 1.138 \n4 0 2.758 5.582 1.453 \n.. ... ... ... ... \n753 0 15.185 7.053 1.325 \n754 0 16.550 6.729 1.132 \n755 0 16.112 6.036 1.219 \n756 0 20.630 5.669 1.111 \n757 0 17.168 6.688 1.305 \n\n[758 rows x 8 columns]","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
FULL_ChargeFULL_AURR980107FULL_GEOR030101FULL_OOBM850104NT_EFC195AS_MeanAmphiMomentAS_FUKS010112CT_RACS820104
04.00.8730.987-4.83300.3827.2251.234
14.00.8920.931-0.58400.3204.9421.853
22.00.9011.039-5.66400.1645.9691.174
34.50.8690.982-5.42302.0105.4621.138
4-4.01.0610.976-2.00202.7585.5821.453
...........................
753-1.51.1000.991-1.987015.1857.0531.325
754-1.01.0851.027-0.745016.5506.7291.132
755-1.01.1081.033-1.789016.1126.0361.219
756-1.00.9551.0231.141020.6305.6691.111
757-7.01.0781.009-0.066017.1686.6881.305
\n

758 rows × 8 columns

\n
"},"metadata":{}}]},{"metadata":{},"cell_type":"markdown","source":"1. #### Decided to first use all the first\n"},{"metadata":{},"cell_type":"markdown","source":"# Evaluate the Performance of Machine Learning Algorithms with Resampling¶\n"},{"metadata":{},"cell_type":"markdown","source":"#### The best way to evaluate the performance of an algorithm would be to make predictions for new data to which you already know the answers."},{"metadata":{},"cell_type":"markdown","source":"## Split into Train and Test Sets"},{"metadata":{},"cell_type":"markdown","source":"#### This algorithm evaluation technique is very fast. It is ideal for large datasets where there is strong evidence that both splits of the data are representative of the underlying problem. Because of the speed, it is useful to use this approach when the algorithm you are investigating is slow to train.\n\n"},{"metadata":{"trusted":true},"cell_type":"code","source":"from sklearn.model_selection import train_test_split\nfrom sklearn.linear_model import LogisticRegression\n\narray = data.values\nX = array[:,0:11]\nY = array[:,11]\ntest_size = 0.30\nseed = 7\nX_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=test_size,\nrandom_state=seed)\nmodel = LogisticRegression()\nmodel.fit(X_train, Y_train)\nresult = model.score(X_test, Y_test)\nprint(\"Accuracy: \", (result*100.0))\n\n\nmodel.fit(X,Y)\noutput = model.predict(test.values)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(X),Y)\nprint('MCC:',mcc)\n \nreport = pd.DataFrame(output)\nreport.columns = ['CLASS']\nreport.index.name = \"Index\"\nreport['CLASS']=report['CLASS'].map({0.0:False, 1.0:True})\nreport.to_csv(\"report.csv\")\n\nprint(report['CLASS'].unique())\nprint('False: ',report.groupby('CLASS').size()[0].sum())\nprint('True: ',report.groupby('CLASS').size()[1].sum())\n","execution_count":23,"outputs":[{"output_type":"stream","text":"Accuracy: 91.55701754385966\nMCC: 0.8342865299822478\n[False True]\nFalse: 383\nTrue: 375\n","name":"stdout"}]},{"metadata":{},"cell_type":"markdown","source":"## K-fold Cross Validation"},{"metadata":{},"cell_type":"markdown","source":"#### It is more accurate because the algorithm is trained and evaluated multiple times on different data."},{"metadata":{"trusted":true},"cell_type":"code","source":"from sklearn.model_selection import KFold\nfrom sklearn.model_selection import cross_val_score\n\nnum_folds = 10 #number of folds to use\nseed = 7 #reproducibility\n\nkfold = KFold(n_splits=num_folds, random_state=seed)\nmodel = LogisticRegression()\nresults = cross_val_score(model, X, Y, cv=kfold)\n\nprint(f\"Accuracy:\", (results.mean()*100.0, results.std()*100.0))\n\n\nmodel.fit(X,Y)\noutput = model.predict(test.values)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(X),Y)\nprint('MCC:',mcc)\n \nreport_kf = pd.DataFrame(output)\nreport_kf.columns = ['CLASS']\nreport_kf.index.name = \"Index\"\nreport_kf['CLASS']=report_kf['CLASS'].map({0.0:False, 1.0:True})\nreport_kf.to_csv(\"report_kf.csv\")\n\nprint(report_kf['CLASS'].unique())\nprint('False: ',report_kf.groupby('CLASS').size()[0].sum())\nprint('True: ',report_kf.groupby('CLASS').size()[1].sum())\n\n","execution_count":24,"outputs":[{"output_type":"stream","text":"Accuracy: (83.5359128018065, 27.08521320979506)\nMCC: 0.8342865299822478\n[False True]\nFalse: 383\nTrue: 375\n","name":"stdout"}]},{"metadata":{},"cell_type":"markdown","source":"## Leave One Out Cross Validation\n#### Its a special case of cross validation where the number of folds equals the number of instances in the data set thus the learning algorithm is applied once for each instance, using all other instances as a training set and using the selected instance as a single-item test set."},{"metadata":{"trusted":true},"cell_type":"code","source":"from sklearn.model_selection import LeaveOneOut\nfrom sklearn.model_selection import cross_val_score\n\nnum_folds = 10\nloocv = LeaveOneOut()\nmodel = LogisticRegression()\nresults = cross_val_score(model, X, Y, cv=loocv)\nprint(\"Accuracy:\", (results.mean()*100.0, results.std()*100.0))\n\n\nmodel.fit(X,Y)\noutput = model.predict(test.values)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(X),Y)\nprint('MCC:',mcc)\n \nreport_l = pd.DataFrame(output)\nreport_l.columns = ['CLASS']\nreport_l.index.name = \"Index\"\nreport_l['CLASS']=report_l['CLASS'].map({0.0:False, 1.0:True})\nreport_l.to_csv(\"report_l.csv\")\n\nprint(report_l['CLASS'].unique())\nprint('False: ',report_l.groupby('CLASS').size()[0].sum())\nprint('True: ',report_l.groupby('CLASS').size()[1].sum())\n","execution_count":25,"outputs":[{"output_type":"stream","text":"Accuracy: (91.4417379855168, 27.974673416141517)\nMCC: 0.8342865299822478\n[False True]\nFalse: 383\nTrue: 375\n","name":"stdout"}]},{"metadata":{},"cell_type":"markdown","source":"## Repeated Random Test-Train Splits"},{"metadata":{},"cell_type":"markdown","source":"#### Creates a random split of the data like the train/test split , but repeats the process of splitting and evaluation of the algorithm multiple times, like cross validation. Repeated random splits can be useful intermediates when trying to balance variance in the estimated performance, model training speed and dataset size\n#### In this I prefered using Repeated Random Test_Train Splits because when you look at the dataset the zeros are one side and the ones on the otherside in the 'class' column. So I would prefer to first shuffle the data and then split it to reduce on the bias"},{"metadata":{"trusted":true},"cell_type":"code","source":"from sklearn.model_selection import ShuffleSplit\nfrom sklearn.model_selection import cross_val_score\n\nn_splits = 10\ntest_size = 0.30\nseed = 7\nkfold = ShuffleSplit(n_splits=n_splits, test_size=test_size, random_state=seed)\nmodel = LogisticRegression()\nresults = cross_val_score(model, X, Y, cv=kfold)\nprint(\"Accuracy: \" , (results.mean()*100.0, results.std()*100.0))\n\n\nmodel.fit(X,Y)\noutput = model.predict(test.values)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(X),Y)\nprint('MCC:',mcc)\n \nreport_rrt = pd.DataFrame(output)\nreport_rrt.columns = ['CLASS']\nreport_rrt.index.name = \"Index\"\nreport_rrt['CLASS']=report_rrt['CLASS'].map({0.0:False, 1.0:True})\nreport_rrt.to_csv(\"report_rrt.csv\")\n\nprint(report_rrt['CLASS'].unique())\nprint('False: ',report_rrt.groupby('CLASS').size()[0].sum())\nprint('True: ',report_rrt.groupby('CLASS').size()[1].sum())\n\n","execution_count":26,"outputs":[{"output_type":"stream","text":"Accuracy: (91.30482456140349, 0.5803122202806698)\nMCC: 0.8342865299822478\n[False True]\nFalse: 383\nTrue: 375\n","name":"stdout"}]},{"metadata":{},"cell_type":"markdown","source":"# Machine Learning Algorithm Performance Metrics"},{"metadata":{},"cell_type":"markdown","source":"## Algorithms Overview\n### linear machine learning algorithms:\n\n Logistic Regression.\n Linear Discriminant Analysis.\n### onlinear machine learning algorithms\n\n k-Nearest Neighbors.\n Naive Bayes.\n Classication and Regression Trees.\n Support Vector Machines.\n"},{"metadata":{},"cell_type":"markdown","source":"## Linear Machine Learning Algorithms"},{"metadata":{},"cell_type":"markdown","source":"## Logistic Regression\n#### It's the appropriate regression analysis to conduct when the dependent variable is binary. So tried to use it on my data since it is binary and has no outliers"},{"metadata":{},"cell_type":"markdown","source":"### Using standardized data"},{"metadata":{"trusted":true},"cell_type":"code","source":"# Logistic regression on standardized data\nnum_folds = 10\nkfold = KFold(n_splits=10, random_state=7)\nmodel = LogisticRegression()\nresults = cross_val_score(model, X, Y, cv=kfold)\nprint(results.mean())\n\nmodel.fit(rescaledX,Y)\noutput = model.predict(rescaledt)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(X),Y)\nprint('MCC:',mcc)\n \nreport_scaled = pd.DataFrame(output)\nreport_scaled.columns = ['CLASS']\nreport_scaled.index.name = \"Index\"\nreport_scaled['CLASS']=report_scaled['CLASS'].map({0.0:False, 1.0:True})\nreport_scaled.to_csv(\"report_scaled.csv\")\n\nprint(report_scaled['CLASS'].unique())\nprint('False: ',report_scaled.groupby('CLASS').size()[0].sum())\nprint('True: ',report_scaled.groupby('CLASS').size()[1].sum())\n","execution_count":27,"outputs":[{"output_type":"stream","text":"0.835359128018065\nMCC: 0.3581965183368267\n[False True]\nFalse: 383\nTrue: 375\n","name":"stdout"}]},{"metadata":{"trusted":true},"cell_type":"code","source":"# Logistic Regression Classification on untuched data\n\nnum_folds = 10\nkfold = KFold(n_splits=10, random_state=7)\nmodel = LogisticRegression()\nresults = cross_val_score(model, X, Y, cv=kfold)\nprint(results.mean())\n\nmodel.fit(X,Y)\noutput = model.predict(test.values)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(X),Y)\nprint('MCC:',mcc)\n \nmy_report = pd.DataFrame(output)\nmy_report.columns = ['CLASS']\nmy_report.index.name = \"Index\"\nmy_report['CLASS']=my_report['CLASS'].map({0.0:False, 1.0:True})\nmy_report.to_csv(\"report_XGB.csv\")\n\nprint(my_report['CLASS'].unique())\nprint('False: ',my_report.groupby('CLASS').size()[0].sum())\nprint('True: ',my_report.groupby('CLASS').size()[1].sum())","execution_count":28,"outputs":[{"output_type":"stream","text":"0.835359128018065\nMCC: 0.8342865299822478\n[False True]\nFalse: 383\nTrue: 375\n","name":"stdout"}]},{"metadata":{},"cell_type":"markdown","source":""},{"metadata":{},"cell_type":"markdown","source":"## Linear Discriminant Analysis¶\n#### Linear Discriminant Analysis(LDA) is a very common technique used for supervised classification problems. It reduces the dimensions by removing the reduntant and dependent features by transforming the features from higher dimensional space to a space with lower dimensions.\n\n"},{"metadata":{"trusted":true},"cell_type":"code","source":"from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n\nnum_folds = 10\nkfold = KFold(n_splits=10, random_state=7)\nmodel = LinearDiscriminantAnalysis()\nresults = cross_val_score(model, X, Y, cv=kfold)\nprint(results.mean())\n\n\nmodel.fit(X,Y)\noutput = model.predict(test.values)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(X),Y)\nprint('MCC:',mcc)\n \nlda_report = pd.DataFrame(output)\nlda_report.columns = ['CLASS']\nlda_report.index.name = \"Index\"\nlda_report['CLASS']=lda_report['CLASS'].map({0.0:False, 1.0:True})\nlda_report.to_csv(\"ldareport.csv\")\n\nprint(lda_report['CLASS'].unique())\nprint('False: ',lda_report.groupby('CLASS').size()[0].sum())\nprint('True: ',lda_report.groupby('CLASS').size()[1].sum())\n\n","execution_count":29,"outputs":[{"output_type":"stream","text":"0.8535044293903076\nMCC: 0.8377162908048379\n[False True]\nFalse: 401\nTrue: 357\n","name":"stdout"}]},{"metadata":{},"cell_type":"markdown","source":"## Nonlinear Machine Learning Algorithms"},{"metadata":{},"cell_type":"markdown","source":"### k-Nearest Neighbors\n#### Can solve both classification and regression problems. However, it is more widely used in classification problems so decided to try it and it gave me a very low score"},{"metadata":{"trusted":true},"cell_type":"code","source":"from sklearn.neighbors import KNeighborsClassifier\nnum_folds = 10\nkfold = KFold(n_splits=10, random_state=7)\nmodel = KNeighborsClassifier()\nresults = cross_val_score(model, X, Y, cv=kfold)\nprint(results.mean())\n\n\n\nmodel.fit(X,Y)\noutput = model.predict(test.values)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(X),Y)\nprint('MCC:',mcc)\n \nreport_k = pd.DataFrame(output)\nreport_k.columns = ['CLASS']\nreport_k.index.name = \"Index\"\nreport_k['CLASS']=report_k['CLASS'].map({0.0:False, 1.0:True})\nreport_k.to_csv(\"report_k.csv\")\n\n\nprint(report_k['CLASS'].unique())\nprint('False: ',report_k.groupby('CLASS').size()[0].sum())\nprint('True: ',report_k.groupby('CLASS').size()[1].sum())","execution_count":30,"outputs":[{"output_type":"stream","text":"0.8027933385443807\nMCC: 0.8690586462107053\n[False True]\nFalse: 402\nTrue: 356\n","name":"stdout"}]},{"metadata":{},"cell_type":"markdown","source":"### Naive Bayes"},{"metadata":{},"cell_type":"markdown","source":"### Tried using Standardised data on Naive Bayes\n\n### When I predicted Naive Bayes on Standardised data gave me a score of 0.98235, after feature selection it gave 0.90 and on unstandardised data it gave a score of 0.9959"},{"metadata":{"trusted":true},"cell_type":"code","source":"# Naive Bayes on standardised data\nfrom sklearn.naive_bayes import GaussianNB\n\nkfold = KFold(n_splits=10, random_state=7)\nmodel = GaussianNB()\nresults = cross_val_score(model, X, Y, cv=kfold)\nprint(results.mean())\n\n\nmodel.fit(rescaledX,Y)\noutput = model.predict(rescaledt)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(X),Y)\nprint('MCC:',mcc)\n \nreport_rebayes = pd.DataFrame(output)\nreport_rebayes.columns = ['CLASS']\nreport_rebayes.index.name = \"Index\"\nreport_rebayes['CLASS']=report_rebayes['CLASS'].map({0.0:False, 1.0:True})\nreport_rebayes.to_csv(\"report_rebayes.csv\")\n\n\nprint(report_rebayes['CLASS'].unique())\nprint('False: ',report_rebayes.groupby('CLASS').size()[0].sum())\nprint('True: ',report_rebayes.groupby('CLASS').size()[1].sum())","execution_count":31,"outputs":[{"output_type":"stream","text":"0.880815746048289\nMCC: 0.0\n[ True False]\nFalse: 370\nTrue: 388\n","name":"stdout"}]},{"metadata":{},"cell_type":"markdown","source":"## Naive Bayes on selected features"},{"metadata":{"trusted":true},"cell_type":"code","source":"# Naive Bayes on selected features\n\narray = data.values\nX = array[:,0:11]\nY = array[:,11]\n\nselectedX = X[:,fit.support_]\n\narray2 =test.values\nselectedT = array2[:,fit.support_]\n\nkfold = KFold(n_splits=10, random_state=7)\nmodel = GaussianNB()\nresults = cross_val_score(model, selectedX, Y, cv=kfold)\nprint(results.mean())\n\n\nmodel.fit(selectedX,Y)\noutput = model.predict(selectedT)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(selectedX),Y)\nprint('MCC:',mcc)\n \nreport_sel = pd.DataFrame(output)\nreport_sel.columns = ['CLASS']\nreport_sel.index.name = \"Index\"\nreport_sel['CLASS']=report_sel['CLASS'].map({0.0:False, 1.0:True})\nreport_sel.to_csv(\"report_sel.csv\")\n\n\nprint(report_sel['CLASS'].unique())\nprint('False: ',report_sel.groupby('CLASS').size()[0].sum())\nprint('True: ',report_sel.groupby('CLASS').size()[1].sum())\n","execution_count":32,"outputs":[{"output_type":"stream","text":"0.8765426871634532\nMCC: 0.8397893337148853\n[ True False]\nFalse: 384\nTrue: 374\n","name":"stdout"}]},{"metadata":{"trusted":true},"cell_type":"code","source":"from sklearn.naive_bayes import GaussianNB\n\nkfold = KFold(n_splits=10, random_state=7)\nmodel = GaussianNB()\nresults = cross_val_score(model, X, Y, cv=kfold)\nprint(results.mean())\n\n\nmodel.fit(X,Y)\noutput = model.predict(test.values)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(X),Y)\nprint('MCC:',mcc)\n \nreport_bayes = pd.DataFrame(output)\nreport_bayes.columns = ['CLASS']\nreport_bayes.index.name = \"Index\"\nreport_bayes['CLASS']=report_bayes['CLASS'].map({0.0:False, 1.0:True})\nreport_bayes.to_csv(\"report_bayes.csv\")\n\n\nprint(report_bayes['CLASS'].unique())\nprint('False: ',report_bayes.groupby('CLASS').size()[0].sum())\nprint('True: ',report_bayes.groupby('CLASS').size()[1].sum())","execution_count":33,"outputs":[{"output_type":"stream","text":"0.880815746048289\nMCC: 0.8407203694376205\n[ True False]\nFalse: 370\nTrue: 388\n","name":"stdout"}]},{"metadata":{},"cell_type":"markdown","source":"### Classiffication and Regression Trees"},{"metadata":{},"cell_type":"markdown","source":"#### used for classification or regression predictive modeling problems"},{"metadata":{"trusted":true},"cell_type":"code","source":"from sklearn.tree import DecisionTreeClassifier\nkfold = KFold(n_splits=10, random_state=7)\nmodel = DecisionTreeClassifier()\nresults = cross_val_score(model, X, Y, cv=kfold)\nprint(results.mean())\n\n\nmodel.fit(X,Y)\noutput = model.predict(test.values)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(X),Y)\nprint('MCC:',mcc)\n \nreport_tree = pd.DataFrame(output)\nreport_tree.columns = ['CLASS']\nreport_tree.index.name = \"Index\"\nreport_tree['CLASS']=report_tree['CLASS'].map({0.0:False, 1.0:True})\nreport_tree.to_csv(\"report_tree.csv\")\n\n\nprint(report_tree['CLASS'].unique())\nprint('False: ',report_tree.groupby('CLASS').size()[0].sum())\nprint('True: ',report_tree.groupby('CLASS').size()[1].sum())","execution_count":34,"outputs":[{"output_type":"stream","text":"0.7362232933819698\nMCC: 1.0\n[ True False]\nFalse: 385\nTrue: 373\n","name":"stdout"}]},{"metadata":{},"cell_type":"markdown","source":"### Support Vector Machines "},{"metadata":{},"cell_type":"markdown","source":"#### A support vector machine (SVM) is a supervised machine learning model that uses classification algorithms for two-group classification problems"},{"metadata":{"trusted":true},"cell_type":"code","source":"from sklearn.svm import SVC\n\nkfold = KFold(n_splits=10, random_state=7)\nmodel = SVC()\nresults = cross_val_score(model, X, Y, cv=kfold)\nprint(results.mean())\n\nmodel.fit(X,Y)\noutput = model.predict(test.values)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(X),Y)\nprint('MCC:',mcc)\n \nreport_svm = pd.DataFrame(output)\nreport_svm.columns = ['CLASS']\nreport_svm.index.name = \"Index\"\nreport_svm['CLASS']=report_svm['CLASS'].map({0.0:False, 1.0:True})\nreport_svm.to_csv(\"report_svm.csv\")\n\n\nprint(report_svm['CLASS'].unique())\nprint('False: ',report_svm.groupby('CLASS').size()[0].sum())\nprint('True: ',report_svm.groupby('CLASS').size()[1].sum())\n\n\n","execution_count":35,"outputs":[{"output_type":"stream","text":"0.8350280093798853\nMCC: 0.8187103751493263\n[False True]\nFalse: 397\nTrue: 361\n","name":"stdout"}]},{"metadata":{},"cell_type":"markdown","source":"# Combine Models Into Ensemble Predictions\n\nThe three most popular methods for combining the predictions from different models are:\n \n Bagging\n Boosting\n Voting"},{"metadata":{},"cell_type":"markdown","source":"## BoostingAlgorithms\n#### These seek to improve the prediction power by training a sequence of weak models, each compensating the weaknesses of its predecessors."},{"metadata":{},"cell_type":"markdown","source":"## AdaBoost\n#### This is specifically designed for classification problems\n"},{"metadata":{"trusted":true},"cell_type":"code","source":"# AdaBoost Classification\nfrom pandas import read_csv\nfrom sklearn.model_selection import KFold\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn.ensemble import AdaBoostClassifier\n\n\nX = array[:,0:11]\nY = array[:,11]\n\nnum_trees = 39\nseed=10\n\nkfold = KFold(n_splits=10, random_state=seed)\n\nmodel = AdaBoostClassifier(n_estimators=num_trees, random_state=seed)\nresults = cross_val_score(model, X, Y, cv=kfold)\n\nprint(results.mean())\n\nmodel.fit(X,Y)\noutput = model.predict(test.values)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(X),Y)\nprint('MCC:',mcc)\n \nreport_ada = pd.DataFrame(output)\nreport_ada.columns = ['CLASS']\nreport_ada.index.name = \"Index\"\nreport_ada['CLASS']=report_ada['CLASS'].map({0.0:False, 1.0:True})\nreport_ada.to_csv(\"report_ada.csv\")\n\n\nprint(report_ada['CLASS'].unique())\nprint('False: ',report_ada.groupby('CLASS').size()[0].sum())\nprint('True: ',report_ada.groupby('CLASS').size()[1].sum())\n\n","execution_count":36,"outputs":[{"output_type":"stream","text":"0.7678706357477851\nMCC: 0.874418752243858\n[ True False]\nFalse: 386\nTrue: 372\n","name":"stdout"}]},{"metadata":{},"cell_type":"markdown","source":"## Bagging Algorithms"},{"metadata":{},"cell_type":"markdown","source":"#### its more of Random sampling with replacement Bagging is used with decision trees where it significantly raises the stability of models in the reduction of variance and improving accuracy, which eliminates the challenge of overfitting."},{"metadata":{},"cell_type":"markdown","source":"## Bagged Decision Trees"},{"metadata":{"trusted":true},"cell_type":"code","source":"# Bagged Decision Trees for Classification\nfrom pandas import read_csv\nfrom sklearn.model_selection import KFold\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn.ensemble import BaggingClassifier\nfrom sklearn.tree import DecisionTreeClassifier\n\n#split the data in portions\nX = array[:,0:11]\nY = array[:,11]\nseed = 7 #duplication\n\n#split according to cross validation\nkfold = KFold(n_splits=10, random_state=seed)\n\n#initialize the model\ncart = DecisionTreeClassifier()\n\n#bagging\nnum_trees = 250\n\n#model\nmodel = BaggingClassifier(base_estimator=cart, n_estimators=num_trees, random_state=seed)\n\nresults = cross_val_score(model, X, Y, cv=kfold)\nprint(results.mean())\n\nmodel.fit(X,Y)\noutput = model.predict(test.values)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(X),Y)\nprint('MCC:',mcc)\n \nreport_bag = pd.DataFrame(output)\nreport_bag.columns = ['CLASS']\nreport_bag.index.name = \"Index\"\nreport_bag['CLASS']=report_bag['CLASS'].map({0.0:False, 1.0:True})\nreport_bag.to_csv(\"report_bag.csv\")\n\n\nprint(report_bag['CLASS'].unique())\nprint('False: ',report_bag.groupby('CLASS').size()[0].sum())\nprint('True: ',report_bag.groupby('CLASS').size()[1].sum())\n\n","execution_count":37,"outputs":[{"output_type":"stream","text":"0.8011355740837243\nMCC: 1.0\n[ True False]\nFalse: 383\nTrue: 375\n","name":"stdout"}]},{"metadata":{},"cell_type":"markdown","source":"## Random Forest\n#### The random forest combines hundreds or thousands of decision trees, trains each one on a slightly different set of the observations, splitting nodes in each tree considering a limited number of the features. The final predictions of the random forest are made by averaging the predictions of each individual tree.\n"},{"metadata":{"trusted":true},"cell_type":"code","source":"# Random Forest Classification\nfrom pandas import read_csv\nfrom sklearn.model_selection import KFold\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn.ensemble import RandomForestClassifier\n\n\nX = array[:,0:11]\nY = array[:,11]\n\nnum_trees = 1000\n\nmax_features = 3\n\nkfold = KFold(n_splits=10, random_state=7)\nmodel = RandomForestClassifier(n_estimators=num_trees, max_features=max_features)\nresults = cross_val_score(model, X, Y, cv=kfold)\nprint(results.mean())\n\nmodel.fit(X,Y)\noutput = model.predict(test.values)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(X),Y)\nprint('MCC:',mcc)\n \nreport_rf = pd.DataFrame(output)\nreport_rf.columns = ['CLASS']\nreport_rf.index.name = \"Index\"\nreport_rf['CLASS']=report_rf['CLASS'].map({0.0:False, 1.0:True})\nreport_rf.to_csv(\"report_rf.csv\")\n\n\nprint(report_rf['CLASS'].unique())\nprint('False: ',report_rf.groupby('CLASS').size()[0].sum())\nprint('True: ',report_rf.groupby('CLASS').size()[1].sum())\n","execution_count":38,"outputs":[{"output_type":"stream","text":"0.8110072520409937\nMCC: 1.0\n[ True False]\nFalse: 383\nTrue: 375\n","name":"stdout"}]},{"metadata":{},"cell_type":"markdown","source":""},{"metadata":{},"cell_type":"markdown","source":"## Extra Trees"},{"metadata":{"trusted":true},"cell_type":"code","source":"from sklearn.ensemble import ExtraTreesClassifier\n\nX = array[:,0:11]\nY = array[:,11]\n\nnum_trees = 100\nmax_features = 7\n\nkfold = KFold(n_splits=10, random_state=7)\n\nmodel = ExtraTreesClassifier(n_estimators=num_trees, max_features=max_features)\n\nresults = cross_val_score(model, X, Y, cv=kfold)\n\nprint(results.mean())\n","execution_count":39,"outputs":[{"output_type":"stream","text":"0.8090422529095015\n","name":"stdout"}]},{"metadata":{},"cell_type":"markdown","source":"## Voting Ensemble\n#### Ensemble methods are techniques that create multiple models and then combine them to produce improved results. Train your model using diverse algorithms and then ensemble them to predict the final output. The accuracy of the VotingClassifier is generally higher than the individual classifiers"},{"metadata":{"trusted":true},"cell_type":"code","source":"# Voting Ensemble for Classification\nfrom pandas import read_csv\nfrom sklearn.model_selection import KFold\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.tree import DecisionTreeClassifier\nfrom xgboost import XGBClassifier\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.svm import SVC\nfrom sklearn.ensemble import VotingClassifier\n\n\nX = array[:,0:11]\nY = array[:,11]\nkfold = KFold(n_splits=10, random_state=7)\n\n# create the sub models\nestimators = []\nmodel1 = LogisticRegression()\nestimators.append(('logistic', model1))\n\nmodel2 = DecisionTreeClassifier()\nestimators.append(('cart', model2))\n\nmodel3 = SVC()\nestimators.append(('svm', model3))\n\nmodel4 = XGBClassifier()\nestimators.append(('xgb', model4))\n\nmodel5 = RandomForestClassifier()\nestimators.append(('rfc', model5))\n\n# create the ensemble model\nensemble = VotingClassifier(estimators)\nresults = cross_val_score(ensemble, X, Y, cv=kfold)\nprint(results.mean())\n\n\nmodel.fit(X,Y)\noutput = model.predict(test.values)\n\nfrom sklearn.metrics import matthews_corrcoef\nmcc = matthews_corrcoef(model.predict(X),Y)\nprint('MCC:',mcc)\n \nreport_v = pd.DataFrame(output)\nreport_v.columns = ['CLASS']\nreport_v.index.name = \"Index\"\nreport_v['CLASS']=report_v['CLASS'].map({0.0:False, 1.0:True})\nreport_v.to_csv(\"report_v.csv\")\n\n\nprint(report_v['CLASS'].unique())\nprint('False: ',report_v.groupby('CLASS').size()[0].sum())\nprint('True: ',report_v.groupby('CLASS').size()[1].sum())\n\n\n","execution_count":40,"outputs":[{"output_type":"stream","text":"0.8136399166232412\nMCC: 1.0\n[ True False]\nFalse: 390\nTrue: 368\n","name":"stdout"}]},{"metadata":{},"cell_type":"markdown","source":"## comparing the algorithms"},{"metadata":{"trusted":true},"cell_type":"code","source":"\n# prepare models and add them to a list\nfrom matplotlib import pyplot\n\nmodels = []\nmodels.append(('LR', LogisticRegression(solver='liblinear', multi_class='ovr')))\nmodels.append(('LDA', LinearDiscriminantAnalysis()))\nmodels.append(('KNN', KNeighborsClassifier()))\nmodels.append(('CART', DecisionTreeClassifier()))\nmodels.append(('NB', GaussianNB()))\nmodels.append(('SVM', SVC(gamma='auto')))\nmodels.append(('ETC', ExtraTreesClassifier()))\nmodels.append(('RFC', RandomForestClassifier()))\n\n# evaluate each model in turn\nresults = []\nnames = []\nscoring = 'accuracy'\n\nfor name, model in models:\n kfold = KFold(n_splits=10, random_state=7)\n cv_results = cross_val_score(model, X, Y, cv=kfold, scoring=scoring)\n results.append(cv_results)\n names.append(name)\n msg = (name, cv_results.mean(), cv_results.std())\n print(msg)\n\n# boxplot algorithm comparison\nfig = pyplot.figure()\nfig.suptitle('Algorithm Comparison')\nax = fig.add_subplot(111)\npyplot.boxplot(results)\nax.set_xticklabels(names)\npyplot.show()","execution_count":41,"outputs":[{"output_type":"stream","text":"('LR', 0.8363470557582074, 0.271811965862896)\n('LDA', 0.8535044293903076, 0.2571395669719574)\n('KNN', 0.8027933385443807, 0.2521136100771112)\n('CART', 0.7302696717040125, 0.28782426438771375)\n('NB', 0.880815746048289, 0.11642272449162755)\n('SVM', 0.7046009206183775, 0.2816909583749445)\n('ETC', 0.8396636703143999, 0.2372805087640859)\n('RFC', 0.8093679433732847, 0.29292847209867484)\n","name":"stdout"},{"output_type":"display_data","data":{"text/plain":"
","image/png":"iVBORw0KGgoAAAANSUhEUgAAAXcAAAEVCAYAAAAb/KWvAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAGsZJREFUeJzt3X+cXXV95/HXmyEQFYSkiQXzg7AS6WRHxXZEqyBkUTfQLtTVxYy6Ao+psa0MXdCt1GEh0qb+6FqqGHWpoYiWCZEVG92w4KOOyljQDDVlEwISEMwYkEDCr0JgiJ/945wJJzd3Zu7MnPvjnHk/H4/7eNzz4577OWfuvO/3fs/3nquIwMzMyuWgZhdgZmb5c7ibmZWQw93MrIQc7mZmJeRwNzMrIYe7mVkJOdytKknXSPrLOm37fZJuGWP5qZKG6vHcRSfp45K+0uw6rPU53Kc5Sd+XtFvSoY16zoj4h4h4R6aGkHRco55fiQskbZb0b5KGJH1D0msaVcNkRcRfRcQfNrsOa30O92lM0iLgZCCAMxv0nAc34nnG8TngT4ELgNnAq4FvAb/XzKLG0yLHzgrC4T69fQC4HbgGOGesFSX9maSHJO2Q9IfZ1rakIyRdK2mnpAclXSLpoHTZuZJ+JOkKSbuAlem8gXT5D9On+FdJT0t6T+Y5PyLpkfR5z8vMv0bSFyXdlD7mR5KOkvS36aeQuyW9fpT9WAx8GOiKiO9FxHMR8Uz6aeJTE9yfxyXdL+nN6fztab3nVNT6ZUnflfSUpB9IOiaz/HPp456UdIekkzPLVkq6QdLXJT0JnJvO+3q6fGa67LG0lo2SfjNd9kpJ6yXtkrRN0gcrtrsu3cenJG2R1DnW39+Kx+E+vX0A+If09h9HgqGSpGXARcDbgOOAUypWuRI4Avh36bIPAOdllr8RuB94BbAq+8CIeGt693URcVhEXJ9OH5Vucx7QDayWNCvz0LOBS4A5wHPAbcC/pNM3AH8zyj6fBgxFxE9GWV7r/twJ/AZwHbAWeAPJsXk/8AVJh2XWfx/wF2ltm0iO94iNwAkknyCuA74haWZm+Vnp/hxZ8ThI3pCPABaktfwR8Gy6rA8YAl4JvBv4K0mnZR57Zlr3kcB64AtjHA8rIIf7NCXpJOAYYF1E3AHcB7x3lNXPBv4+IrZExDPAJzLbaQPeA/x5RDwVEQ8AnwX+a+bxOyLiyoh4ISKepTbDwOURMRwRG4CngeMzy2+MiDsiYg9wI7AnIq6NiL3A9UDVljtJCD402pPWuD8/j4i/zzzXgrTW5yLiFuB5kqAf8X8i4ocR8RzQC/yupAUAEfH1iHgsPTafBQ6t2M/bIuJbEfHrKsduON2f4yJib3o8nky3fRLwsYjYExGbgK9U7MNARGxI9+FrwOtGOyZWTA736esc4JaIeDSdvo7Ru2ZeCWzPTGfvzwEOAR7MzHuQpMVdbf1aPRYRL2SmnwGyreFfZe4/W2U6u+5+2wWOHuN5a9mfyuciIsZ6/n37HxFPA7tIjulI19NWSU9IepykJT6n2mOr+BpwM7A27S77jKQZ6bZ3RcRTY+zDw5n7zwAz3adfLg73aUjSS0ha46dIeljSw8CFwOskVWvBPQTMz0wvyNx/lKQFeUxm3kLgl5npVrr06D8B88foY65lfyZq3/FKu2tmAzvS/vWPkfwtZkXEkcATgDKPHfXYpZ9qPhERS4A3A79P0oW0A5gt6fAc98EKxuE+Pf0BsBdYQtLfewLQDtxKEg6V1gHnSWqX9FLg0pEF6cf6dcAqSYenJwsvAr4+gXp+RdK/XXcRcS/wRaBPyXj6Q9ITk8slXZzT/lQ6Q9JJkg4h6Xv/cURsBw4HXgB2AgdLuhR4ea0blbRU0mvSrqQnSd6U9qbb/mfgk+m+vZbkvEVln72VmMN9ejqHpA/9FxHx8MiN5KTa+yo/nkfETcDngX5gG8nJS0hOZAL0AP9GctJ0gKSL5+oJ1LMS+Go64uPsSe7TRFxAsq+rgcdJzje8E/h2unyq+1PpOuAyku6Y3yE5wQpJl8pNwM9Iuk32MLEurKNITrY+CWwFfsCLb0JdwCKSVvyNwGUR8d0p7IMVjPxjHTZRktqBzcChFf3iVkHSNSSjcy5pdi02vbjlbjWR9M60C2MW8Gng2w52s9blcLdafYikb/g+kv76P25uOWY2FnfLmJmVkFvuZmYl5HA3Myshh7uZWQk53M3MSsjhbmZWQg53M7MScribmZWQw93MrIQc7mZmJeRwNzMrIYe7mVkJOdzNzErI4W5mVkIOdzOzEmrar53PmTMnFi1a1KynNzMrpDvuuOPRiJg73npNC/dFixYxODjYrKc3MyskSQ/Wsp67ZczMSsjhbmZWQg53M7MScribmZWQw93MrITGDXdJV0t6RNLmUZZL0uclbZN0p6Tfzr9MMzObiFpa7tcAy8ZYfjqwOL2tAL409bLMzGwqxg33iPghsGuMVc4Cro3E7cCRko7Oq0AzM5u4PL7ENA/YnpkeSuc9VLmipBUkrXsWLlw4pSeVVPO6ETGl55qKItQ5kRqhucfT8lOE1ya4zsnKI9yr7VHVyiPiKuAqgM7OzintXbWDI6nlgqcIdY5WS6vVafkqwmsTXOdk5TFaZghYkJmeD+zIYbuFMnv2bCTVdANqWm/27NlN3qvWUuvxzR5ns+kqj3BfD3wgHTXzJuCJiDigS2Yqag1OaF5o7t69m4jI9bZ79+7c6yyy0Y7TaMvM6qEoDblxu2Uk9QGnAnMkDQGXATMAIuLLwAbgDGAb8AxwXt5FjgRnXtyqs3ryOYxyyzuPoD6ZNG64R0TXOMsD+HBuFZkVXKv1vRbJ7Nmza/7EWmsgzpo1i127xhrwV05Nu+SvmVmlorSKi6AQ4R6XvRxWHpHv9qapibSMoLZ/jOnaMjJrZYUId33iydz73GNlbpsrFLeMzKaHQoR7EeT96WLfNs3MJsHhnpO8P13A9P6EYWZT43A3mwKP7ph+ivIp3eFuLakoJ359DmP6KcqndIe7tSSHZr6K8gmjKK3iInC4m00DRXmzLEqruAgc7mZT4JamtSqHu9kUuKVprcrhPs24pWk2PTjcpxm3NM2mhzyu525mZi3G4W5mVkIOdzOzEnK4m5mVkMPdzKyEHO5mZiXkcDczKyGPc89R3tfamDVrVq7bs/rw391akcM9JxP5YpCk3L9IVDZF+SZtrX9H/82t0Rzu1pL8TVqzqXGfu5lZCTnczcxKqDDdMnmetPIJKzMru0KEu09amZlNjLtlzMxKyOFuZlZCheiWMbPpw18Ky4fD3cxahs+v5aembhlJyyTdI2mbpIurLF8oqV/STyXdKemM/Es1M7Najdtyl9QGrAbeDgwBGyWtj4i7MqtdAqyLiC9JWgJsABbVoV4zm4SiXM7B8lNLt8yJwLaIuB9A0lrgLCAb7gGM/KWPAHbkWaSZTY0v5zD91BLu84Dtmekh4I0V66wEbpHUA7wMeFsu1ZmZ2aTU0ude7dR1ZROgC7gmIuYDZwBfk3TAtiWtkDQoaXDnzp0Tr9bMzGpSS7gPAQsy0/M5sNulG1gHEBG3ATOBOZUbioirIqIzIjrnzp07uYrNWpykA26jzc972J/ZiFrCfSOwWNKxkg4BlgPrK9b5BXAagKR2knB309ympYiY0M2sHsbtc4+IFySdD9wMtAFXR8QWSZcDgxGxHvgI8HeSLiTpsjk3/KoFRv9CRrX5PmRmlpeavsQUERtIhjdm512auX8X8JZ8SysHB7ZZ+RThW7T+hqqZ2QQU5Sc1feEwM7MScribmZWQw93MrIQc7mZmJeRwNzMrIYe7mVkJOdzNzErI4W5mVkIOdzOzEnK4m5mVkMPdzKyEfG0ZM2tpvrLq5Djcp6EiXNHObIQDe3Ic7tNMUa5oZ2ZT4z53M7MScribmZWQw93MrIQc7mZmJeRwNzMrIYe7mVkJOdzNzErI4W5mVkKF/RKTv5JsZja6woa7A9vMbHSFDXfLz1jXmvEnIbNicribw9qshHxC1cyshBzuZmYl5HA3Myshh7uZWQk53M3MSqimcJe0TNI9krZJuniUdc6WdJekLZKuy7dMMzObiHGHQkpqA1YDbweGgI2S1kfEXZl1FgN/DrwlInZLekW9CjYzs/HV0nI/EdgWEfdHxPPAWuCsinU+CKyOiN0AEfFIvmWamdlE1BLu84DtmemhdF7Wq4FXS/qRpNslLau2IUkrJA1KGty5c+fkKjYzs3HVEu7Vvpte+ZXGg4HFwKlAF/AVSUce8KCIqyKiMyI6586dO9FazcysRrWE+xCwIDM9H9hRZZ1/jIjhiPg5cA9J2JuZWRPUEu4bgcWSjpV0CLAcWF+xzreApQCS5pB009yfZ6FmRdTX10dHRwdtbW10dHTQ19fX7JJsmhh3tExEvCDpfOBmoA24OiK2SLocGIyI9emyd0i6C9gL/PeIeKyehZu1ur6+Pnp7e1mzZg0nnXQSAwMDdHd3A9DV1dXk6ixvrfYbE2rWFQE7OztjcHCwKc9trU9S7v8A9djmWDo6OrjyyitZunTpvnn9/f309PSwefPmhtUB5TielpB0R0R0jruew91aURnCqK2tjT179jBjxox984aHh5k5cyZ79+5tWB1QjuNpiVrD3ZcfMKuT9vZ2BgYG9ps3MDBAe3t7U+qRlOtt1qxZTdkPq43D3VpW0cOot7eX7u5u+vv7GR4epr+/n+7ubnp7extaByR9vLXcJrLurl27Gr4fVjv/EpO1pIl83G/V7oGRk6Y9PT1s3bqV9vZ2Vq1a5ZOp1hDuc7fCa9VwLyIfy9bnPnczs2nM4W5mVkIOdzOzEnK4m5mVkMPdzKyEHO5mZiXkcDczKyGHu5lZCTnczcxKyOFuZlZCDnczsxJyuJuZlZDD3cyshBzuZmYl5HA3Myshh7uZWQk53M3MSsjhbmZWQg53M7MScribmZWQw93MrIQc7mZmJeRwNzMrIYe7mVkJOdzNzErI4W5mVkI1hbukZZLukbRN0sVjrPduSSGpM78SzcxsosYNd0ltwGrgdGAJ0CVpSZX1DgcuAH6cd5FmZjYxtbTcTwS2RcT9EfE8sBY4q8p6fwF8BtiTY31mZjYJtYT7PGB7ZnoonbePpNcDCyLiO2NtSNIKSYOSBnfu3DnhYs3MrDa1hLuqzIt9C6WDgCuAj4y3oYi4KiI6I6Jz7ty5tVdpZmYTUku4DwELMtPzgR2Z6cOBDuD7kh4A3gSs90lVM7PmqSXcNwKLJR0r6RBgObB+ZGFEPBERcyJiUUQsAm4HzoyIwbpUbGZm4xo33CPiBeB84GZgK7AuIrZIulzSmfUu0MzMJu7gWlaKiA3Ahop5l46y7qlTL8vMzKbC31A1Myshh7uZWQk53M3MSsjhbmZWQg53M7MScribmZWQw93MrIQc7mZmJeRwNzMrIYe7mVkJOdzNzErI4W5mVkIOdzOzEnK4m5mVkMPdzKyEHO5mZiXkcDczKyGHu5lZCTnczcxKyOFuZlZCDnczsxJyuJuZlZDD3cyshBzuZmYl5HA3Myshh7uZWQk53M3MSsjhbmZWQg53M7MScribmZWQw93MrIRqCndJyyTdI2mbpIurLL9I0l2S7pT0T5KOyb9UMzOr1bjhLqkNWA2cDiwBuiQtqVjtp0BnRLwWuAH4TN6FmplZ7WppuZ8IbIuI+yPieWAtcFZ2hYjoj4hn0snbgfn5lmlmZhNRS7jPA7ZnpofSeaPpBm6aSlFmZjY1B9ewjqrMi6orSu8HOoFTRlm+AlgBsHDhwhpLNDOziaql5T4ELMhMzwd2VK4k6W1AL3BmRDxXbUMRcVVEdEZE59y5cydTr5lNY319fXR0dNDW1kZHRwd9fX3NLqll1dJy3wgslnQs8EtgOfDe7AqSXg/8L2BZRDySe5VmNu319fXR29vLmjVrOOmkkxgYGKC7uxuArq6uJlfXesZtuUfEC8D5wM3AVmBdRGyRdLmkM9PV/ho4DPiGpE2S1tetYjObllatWsWaNWtYunQpM2bMYOnSpaxZs4ZVq1Y1u7SWpIiq3ed119nZGYODg015bisXSTTrdVw2rXws29ra2LNnDzNmzNg3b3h4mJkzZ7J3794mVtZYku6IiM7x1vM3VM2sENrb2xkYGNhv3sDAAO3t7U2qqLU53M2sEHp7e+nu7qa/v5/h4WH6+/vp7u6mt7e32aW1pFpOqJqZNd3ISdOenh62bt1Ke3s7q1at8snUUbjP3QqvlfuJi8bHsvW5z90mzWOJzYrP3TK2H48lNisHt9xtPx5LbFYO7nO3/RRxLLH7ifPjY9n63Oduk+KxxGbl4HC3/XgssVk5ONwbqAijULq6uli1ahU9PT3MnDmTnp4ejyU2KyCPlmmQIo1C6erqarmazGxi3HJvEI9CMbNG8miZBiniKJSi8AiP/PhYtj6PlmkxHoViZo3kcG8Qj0Ixs0YqRbh7FIqZ2f4KP1rGo1DMzA5U+Ja7R6GYmR2o8KNlPArFPMIjPz6WrW/ajJbxKBSzyZF0wG2s+VYshQ93j0Ixm5yIqPlmxVP4E6r+XUUzswMVvs/dzP3ENp1Mmz53MzM7kMPdzKyEHO5mZiVU+BOqNn2MNSSv2jL3w9t05pa7FcZEhu452K2ZWuF6V265m5nlqFWud+WhkGZmOero6ODKK69k6dKl++b19/fT09PD5s2bp7z9WodCOtzNzHJU7+td5TrOXdIySfdI2ibp4irLD5V0fbr8x5IWTbxkM7Pia5XrXY0b7pLagNXA6cASoEvSkorVuoHdEXEccAXw6bwLNTMrgla53lUtJ1RPBLZFxP0AktYCZwF3ZdY5C1iZ3r8B+IIkhYcsmNk00yrXu6ol3OcB2zPTQ8AbR1snIl6Q9ATwG8Cj2ZUkrQBWACxcuHCSJZuZtbZW+NW1Wvrcq31zpLJFXss6RMRVEdEZEZ1z586tpT4zM5uEWsJ9CFiQmZ4P7BhtHUkHA0cAu/Io0MzMJq6WcN8ILJZ0rKRDgOXA+op11gPnpPffDXzP/e1mZs0zbp972od+PnAz0AZcHRFbJF0ODEbEemAN8DVJ20ha7MvrWbSZmY2tpssPRMQGYEPFvEsz9/cA/yXf0szMbLKa9g1VSTuBB3Pe7BwqRui0KNeZryLUWYQawXXmrR51HhMR445IaVq414OkwVq+lttsrjNfRaizCDWC68xbM+v0JX/NzErI4W5mVkJlC/erml1AjVxnvopQZxFqBNeZt6bVWao+dzMzS5St5W5mZhQ43CU9XWXeSkm/lLRJ0l2SGn7lnhrqulfSNysvmyxprqRhSR9qZI2SzkhrWpjW+YykV4yybkj6bGb6o5JW1qG+oyStlXRf+nfcIOnV6bILJe2RdERm/VMlPSHpp5LulvQ/0/nnpcd8k6TnJf2/9P6n8q45U8uox6jidXC3pC9Jatj/oKReSVsk3ZnWcJOkT1asc4Kkren9ByTdWrF8k6Sp/5zQ+LXuzfztNkm6WNKN6f1t6d97ZNmbJc2Q9Kn0tbxZ0k8knd6gGjdL+rakI9P5iyQ9W1H/Iemy0yUNStqafa3WxUR/dLhVbsDTVeatBD6a3l8MPAnMaKW60un3AA8DczPz/gS4Ffh+o2oETgPuA16VqfMXwKer7Q+wB/g5MCed/iiwMufaBNwG/FFm3gnAyen9n6TH6dzM8lOB76T3XwLcDbylYrsPjNRd52M76jGqeH0eBAwASxv0uvzd9Lgemk7PAU4B7q9Y71PA/8gcs03AgnS6PZ3e3KjX6CjL9v29K+r+amb/fhM4u1E1ps/dm95fVO0YAR3p/9tvpdMHA39Sr/oK23IfT0TcCzwDzGp2LZUi4nrgFuC9mdldwEeA+ZLm1bsGSScDfwf8XkTcl1l0NfAeSbOrPOwFkhNEF9axtKXAcER8eWRGRGyKiFslvQo4DLiE5HgdICKeJQmguh/DUdR6jA4BZgK7615R4mjg0Yh4DiAiHo2IHwCPS8pewvtsYG1meh1JYwSSY97XiGInQtJLgQ8CPZn9+1VErGtgGbcx/mvuz4BVEXE3JJd2iYgv1qug0oa7pN8G7o2IR5pdyyj+BfgtAEkLgKMi4ifs/89UL4cC/wj8wcgLLeNpkoD/01Eeuxp4X7ZbJGcdwB2jLBsJl1uB47PdRyMkzSL51PbDOtVXi7GO0YWSNgEPAT+LiE0NqukWYIGkn0n6oqRT0vl9pNeCkvQm4LG0YTTiBuA/p/f/E/DtBtX7kopujbH+J44DfhERTzaotv0o+bW609j/goqvytS+Op031ms7d2UM9wsl3QP8mBd/HaoVZa+Bv5wk1CFpNdX7XMEw8M8kP49YzeeBcyS9vHJB+g90LXBB/cob1XJgbUT8Gvgm+1/P6GRJd5J0d30nIh5uQn3AuMfoiog4AXgF8DJJDbnIXkQ8DfwOyY/l7ASul3Quyevt3Wnf/3IObJnvAnandW4l+TTcCM9GxAmZ2/UNet6JeEn6Rv0YMBv4bmbZfZnaP9yM4soY7ldExPEkrd9rJc1sdkGjeD3JPwskYX6upAdI3v1fJ2lxHZ/71yQfv98g6eOVCyPiceA6kvMA1fwtyRvDy+pQ2xaSENqPpNeStMi/mx6n5ez/JnhrRLwWeA3wx5JOqENtEzHmMYqIYeD/Am9tVEERsTcivh8RlwHnA++KiO0kfeunAO/ixUZG1vUkn0ZarksmtQ1YKOnwBj/vs+kb9TEk3WzjhXjV13a9lDHcAYiIbwKDvHid+ZYh6V3AO4A+SccDL4uIeRGxKCIWAZ+kzpdNjohngN8n6T6o1oL/G+BDVLlyaETsIgmB0Vr+U/E94FBJHxyZIekNwOdITkwuSm+vBOZJOqaitp+RHL+P1aG2mo13jCQJeDPJCba6k3R8RYPhBF68cF8fyQ/b3xcRQ1UefiPwGZLLfrec9LW8Bvh8ZlTK0ZLe36Dnf4LkU9pHJc0YY9W/Bj6eGfl1kKSL6lVXkcP9pZKGMrdqB+ly4KJGDjcbo64L0/63e4H3A/8hInaStD5vrNjG/6b+XTMjAbQMuETSWRXLHk3rOnSUh3+WZMRF3jUF8E7g7UqGQm4h6V47lQOP041UfxP8MvBWScfmXd8EVTtGI33um0neOOt2Qq3CYcBXlQwtvRNYwovdlt8A/j37n0jdJyKeiohPR8TzDak0UdnnPt7w1UtIupvuSodqfiudboiI+Cnwr4zRKIuIO4H/RtKo20ryGji6XjX5G6pmZiVU5Ja7mZmNwuFuZlZCDnczsxJyuJuZlZDD3cyshBzuZmYl5HA3Myshh7uZWQn9f8x1EpTZ5KE3AAAAAElFTkSuQmCC\n"},"metadata":{"needs_background":"light"}}]},{"metadata":{},"cell_type":"markdown","source":"#### Actually this should come before. It should help in choosing the best algorithm. When you look at the results Naive Bayes gave the highest score of 0.88 and indeed it gave me the best results."},{"metadata":{},"cell_type":"markdown","source":"# References\n#### https://machinelearningmastery.com/evaluate-performance-machine-learning-algorithms-python-using-resampling/\n#### https://www.dataquest.io/blog/top-10-machine-learning-algorithms-for-beginners/\n#### https://monkeylearn.com/blog/introduction-to-support-vector-machines-svm/\n#### https://towardsdatascience.com/understanding-random-forest-58381e0602d2\n#### https://towardsdatascience.com/ensemble-learning-in-machine-learning-getting-started-4ed85eb38e00"},{"metadata":{},"cell_type":"markdown","source":"# '''''''''''''''''''''''''''''''END''''''''''''''''''''''''''''''"},{"metadata":{},"cell_type":"markdown","source":""},{"metadata":{},"cell_type":"markdown","source":""}],"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"name":"python","version":"3.6.4","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat":4,"nbformat_minor":4} \ No newline at end of file