diff --git a/02_activities/assignments/assignment_1.ipynb b/02_activities/assignments/assignment_1.ipynb index 28d4df017..91a8f3a3b 100644 --- a/02_activities/assignments/assignment_1.ipynb +++ b/02_activities/assignments/assignment_1.ipynb @@ -34,7 +34,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "4a3485d6-ba58-4660-a983-5680821c5719", "metadata": {}, "outputs": [], @@ -56,10 +56,288 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "a431d282-f9ca-4d5d-8912-71ffc9d8ea19", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
alcoholmalic_acidashalcalinity_of_ashmagnesiumtotal_phenolsflavanoidsnonflavanoid_phenolsproanthocyaninscolor_intensityhueod280/od315_of_diluted_winesprolineclass
014.231.712.4315.6127.02.803.060.282.295.641.043.921065.00
113.201.782.1411.2100.02.652.760.261.284.381.053.401050.00
213.162.362.6718.6101.02.803.240.302.815.681.033.171185.00
314.371.952.5016.8113.03.853.490.242.187.800.863.451480.00
413.242.592.8721.0118.02.802.690.391.824.321.042.93735.00
.............................................
17313.715.652.4520.595.01.680.610.521.067.700.641.74740.02
17413.403.912.4823.0102.01.800.750.431.417.300.701.56750.02
17513.274.282.2620.0120.01.590.690.431.3510.200.591.56835.02
17613.172.592.3720.0120.01.650.680.531.469.300.601.62840.02
17714.134.102.7424.596.02.050.760.561.359.200.611.60560.02
\n", + "

178 rows × 14 columns

\n", + "
" + ], + "text/plain": [ + " alcohol malic_acid ash alcalinity_of_ash magnesium total_phenols \\\n", + "0 14.23 1.71 2.43 15.6 127.0 2.80 \n", + "1 13.20 1.78 2.14 11.2 100.0 2.65 \n", + "2 13.16 2.36 2.67 18.6 101.0 2.80 \n", + "3 14.37 1.95 2.50 16.8 113.0 3.85 \n", + "4 13.24 2.59 2.87 21.0 118.0 2.80 \n", + ".. ... ... ... ... ... ... \n", + "173 13.71 5.65 2.45 20.5 95.0 1.68 \n", + "174 13.40 3.91 2.48 23.0 102.0 1.80 \n", + "175 13.27 4.28 2.26 20.0 120.0 1.59 \n", + "176 13.17 2.59 2.37 20.0 120.0 1.65 \n", + "177 14.13 4.10 2.74 24.5 96.0 2.05 \n", + "\n", + " flavanoids nonflavanoid_phenols proanthocyanins color_intensity hue \\\n", + "0 3.06 0.28 2.29 5.64 1.04 \n", + "1 2.76 0.26 1.28 4.38 1.05 \n", + "2 3.24 0.30 2.81 5.68 1.03 \n", + "3 3.49 0.24 2.18 7.80 0.86 \n", + "4 2.69 0.39 1.82 4.32 1.04 \n", + ".. ... ... ... ... ... \n", + "173 0.61 0.52 1.06 7.70 0.64 \n", + "174 0.75 0.43 1.41 7.30 0.70 \n", + "175 0.69 0.43 1.35 10.20 0.59 \n", + "176 0.68 0.53 1.46 9.30 0.60 \n", + "177 0.76 0.56 1.35 9.20 0.61 \n", + "\n", + " od280/od315_of_diluted_wines proline class \n", + "0 3.92 1065.0 0 \n", + "1 3.40 1050.0 0 \n", + "2 3.17 1185.0 0 \n", + "3 3.45 1480.0 0 \n", + "4 2.93 735.0 0 \n", + ".. ... ... ... \n", + "173 1.74 740.0 2 \n", + "174 1.56 750.0 2 \n", + "175 1.56 835.0 2 \n", + "176 1.62 840.0 2 \n", + "177 1.60 560.0 2 \n", + "\n", + "[178 rows x 14 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "from sklearn.datasets import load_wine\n", "\n", @@ -91,12 +369,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "56916892", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "178" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your answer here" + "178" ] }, { @@ -109,12 +398,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "df0ef103", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "14" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your answer here" + "14\n" ] }, { @@ -127,12 +427,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "47989426", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "array([0, 1, 2])" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your answer here" + " wine_df['class'].unique()\n", + "\n" ] }, { @@ -146,12 +458,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "bd7b0910", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "13" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your answer here" + "13" ] }, { @@ -175,20 +498,306 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "cc899b59", + "execution_count": 7, + "id": "f304cef6", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
alcoholmalic_acidashalcalinity_of_ashmagnesiumtotal_phenolsflavanoidsnonflavanoid_phenolsproanthocyaninscolor_intensityhueod280/od315_of_diluted_winesprolineclass
01.518613-0.5622500.232053-1.1695931.9139050.8089971.034819-0.6595631.2248840.2517170.3621771.8479201.0130090
10.246290-0.499413-0.827996-2.4908470.0181450.5686480.733629-0.820719-0.544721-0.2933210.4060511.1134490.9652420
20.1968790.0212311.109334-0.2687380.0883580.8089971.215533-0.4984072.1359680.2690200.3183040.7885871.3951480
31.691550-0.3468110.487926-0.8092510.9309182.4914461.466525-0.9818751.0321551.186068-0.4275441.1840712.3345740
40.2957000.2276941.8404030.4519461.2819850.8089970.6633510.2267960.401404-0.3192760.3621770.449601-0.0378740
.............................................
1730.8762752.9745430.3051590.301803-0.332922-0.985614-1.4249001.274310-0.9301791.142811-1.392758-1.231206-0.0219522
1740.4933431.4126090.4148201.0525160.158572-0.793334-1.2843440.549108-0.3169500.969783-1.129518-1.4854450.0098932
1750.3327581.744744-0.3893550.1516611.422412-1.129824-1.3445820.549108-0.4220752.224236-1.612125-1.4854450.2805752
1760.2092320.2276940.0127320.1516611.422412-1.033684-1.3546221.354888-0.2293461.834923-1.568252-1.4006990.2964982
1771.3950861.5831651.3652081.502943-0.262708-0.392751-1.2743051.596623-0.4220751.791666-1.524378-1.428948-0.5951602
\n", + "

178 rows × 14 columns

\n", + "
" + ], + "text/plain": [ + " alcohol malic_acid ash alcalinity_of_ash magnesium \\\n", + "0 1.518613 -0.562250 0.232053 -1.169593 1.913905 \n", + "1 0.246290 -0.499413 -0.827996 -2.490847 0.018145 \n", + "2 0.196879 0.021231 1.109334 -0.268738 0.088358 \n", + "3 1.691550 -0.346811 0.487926 -0.809251 0.930918 \n", + "4 0.295700 0.227694 1.840403 0.451946 1.281985 \n", + ".. ... ... ... ... ... \n", + "173 0.876275 2.974543 0.305159 0.301803 -0.332922 \n", + "174 0.493343 1.412609 0.414820 1.052516 0.158572 \n", + "175 0.332758 1.744744 -0.389355 0.151661 1.422412 \n", + "176 0.209232 0.227694 0.012732 0.151661 1.422412 \n", + "177 1.395086 1.583165 1.365208 1.502943 -0.262708 \n", + "\n", + " total_phenols flavanoids nonflavanoid_phenols proanthocyanins \\\n", + "0 0.808997 1.034819 -0.659563 1.224884 \n", + "1 0.568648 0.733629 -0.820719 -0.544721 \n", + "2 0.808997 1.215533 -0.498407 2.135968 \n", + "3 2.491446 1.466525 -0.981875 1.032155 \n", + "4 0.808997 0.663351 0.226796 0.401404 \n", + ".. ... ... ... ... \n", + "173 -0.985614 -1.424900 1.274310 -0.930179 \n", + "174 -0.793334 -1.284344 0.549108 -0.316950 \n", + "175 -1.129824 -1.344582 0.549108 -0.422075 \n", + "176 -1.033684 -1.354622 1.354888 -0.229346 \n", + "177 -0.392751 -1.274305 1.596623 -0.422075 \n", + "\n", + " color_intensity hue od280/od315_of_diluted_wines proline class \n", + "0 0.251717 0.362177 1.847920 1.013009 0 \n", + "1 -0.293321 0.406051 1.113449 0.965242 0 \n", + "2 0.269020 0.318304 0.788587 1.395148 0 \n", + "3 1.186068 -0.427544 1.184071 2.334574 0 \n", + "4 -0.319276 0.362177 0.449601 -0.037874 0 \n", + ".. ... ... ... ... ... \n", + "173 1.142811 -1.392758 -1.231206 -0.021952 2 \n", + "174 0.969783 -1.129518 -1.485445 0.009893 2 \n", + "175 2.224236 -1.612125 -1.485445 0.280575 2 \n", + "176 1.834923 -1.568252 -1.400699 0.296498 2 \n", + "177 1.791666 -1.524378 -1.428948 -0.595160 2 \n", + "\n", + "[178 rows x 14 columns]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Select predictors (excluding the last column)\n", - "predictors = wine_df.iloc[:, :-1]\n", + "predictors_standardized = wine_df.copy()\n", + "columns_to_exclude = ['class']\n", "\n", - "# Standardize the predictors\n", + "# Select the columns that we want to scale by excluding the 'id' and 'diagnosis' columns\n", + "# This will return a list of the numeric columns we need to scale\n", + "columns_to_scale = predictors_standardized.columns.difference(columns_to_exclude)\n", + "\n", + "# Initialize the StandardScaler to standardize the selected numeric columns\n", "scaler = StandardScaler()\n", - "predictors_standardized = pd.DataFrame(scaler.fit_transform(predictors), columns=predictors.columns)\n", "\n", - "# Display the head of the standardized predictors\n", - "print(predictors_standardized.head())" + "# Apply the scaler to the selected columns. This transforms the data so that each feature\n", + "# has a mean of 0 and a standard deviation of 1, which is essential to prevent larger\n", + "# scale features from dominating the analysis, especially for distance-based algorithms like KNN.\n", + "predictors_standardized[columns_to_scale] = scaler.fit_transform(wine_df[columns_to_scale])\n", + "\n", + "# Output the standardized dataframe with the scaled numeric columns\n", + "predictors_standardized" ] }, { @@ -204,7 +813,7 @@ "id": "403ef0bb", "metadata": {}, "source": [ - "> Your answer here..." + "Standardizing predictor variables allows models to treat each predictor fairly and making the coefficients easier to compare. Without standardization, predictors measured in large units can dominate the calculation and lead to unstable or less reliable model estimates. Standardizing also improves the numerical stability of the regression algorithm and is essential when using regularization methods like Ridge or Lasso, which penalize coefficients based on their size. " ] }, { @@ -220,7 +829,7 @@ "id": "fdee5a15", "metadata": {}, "source": [ - "> Your answer here..." + "The 'class' variable in this case is a categorical variable, despite being a string. Standardization only makes sense for continous veriables where the differences and sitances have meaningful interpreations. In the case of categorical data, it represents groupings of the data opposed to specific numerical values. " ] }, { @@ -236,7 +845,7 @@ "id": "f0676c21", "metadata": {}, "source": [ - "> Your answer here..." + "The specific seed value is not important, but rather setting the seed function, as it ensures that the data is randomly split between the training and testing data and ensures reproducibility in the data. " ] }, { @@ -251,7 +860,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "72c101f2", "metadata": {}, "outputs": [], @@ -261,7 +870,10 @@ "\n", "# split the data into a training and testing set. hint: use train_test_split !\n", "\n", - "# Your code here ..." + "wine_train, wine_test = train_test_split(\n", + " predictors_standardized, train_size=0.75, stratify=predictors_standardized[\"class\"]\n", + ")\n", + "\n" ] }, { @@ -284,12 +896,1496 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "08818c64", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
mean_fit_timestd_fit_timemean_score_timestd_score_timeparam_n_neighborsparamssplit0_test_scoresplit1_test_scoresplit2_test_scoresplit3_test_scoresplit4_test_scoresplit5_test_scoresplit6_test_scoresplit7_test_scoresplit8_test_scoresplit9_test_scoremean_test_scorestd_test_scorerank_test_score
00.0052240.0118570.0044190.0087881{'n_neighbors': 1}1.0000000.9285711.00.6923081.0000001.01.0000001.0000001.0000000.9230770.9543960.09213939
10.0009350.0000280.0012490.0000682{'n_neighbors': 2}1.0000000.9285711.00.6923081.0000001.01.0000000.9230770.9230770.9230770.9390110.08962850
20.0010570.0002470.0012770.0000613{'n_neighbors': 3}1.0000000.9285711.00.8461541.0000001.01.0000001.0000000.9230770.9230770.9620880.05121728
30.0010610.0001870.0045890.0096984{'n_neighbors': 4}0.9285710.9285711.00.8461540.9230771.01.0000001.0000000.9230771.0000000.9549450.05040635
40.0010420.0001060.0652210.1914695{'n_neighbors': 5}1.0000000.9285711.00.9230770.9230771.01.0000001.0000000.9230771.0000000.9697800.0370427
50.0013890.0008540.0016170.0004226{'n_neighbors': 6}0.9285711.0000001.00.9230770.9230771.01.0000001.0000000.9230771.0000000.9697800.0370427
60.0009900.0000610.0012860.0000587{'n_neighbors': 7}1.0000000.9285711.00.9230770.9230771.01.0000001.0000001.0000001.0000000.9774730.0344411
70.0009890.0000450.0013570.0001008{'n_neighbors': 8}0.9285711.0000001.00.8461540.9230771.01.0000000.9230770.9230771.0000000.9543960.05071939
80.0009800.0000530.0013400.0001339{'n_neighbors': 9}1.0000000.9285711.00.9230770.9230771.01.0000000.9230771.0000001.0000000.9697800.0370429
90.0009550.0000620.0012590.00005510{'n_neighbors': 10}1.0000000.9285711.00.8461540.9230771.00.9230770.9230771.0000001.0000000.9543960.05071939
100.0009460.0000520.0012510.00003811{'n_neighbors': 11}0.9285710.9285711.00.9230770.9230771.01.0000000.9230771.0000001.0000000.9626370.03741117
110.0009550.0000260.0012980.00010012{'n_neighbors': 12}1.0000000.9285711.00.9230770.9230771.01.0000000.9230771.0000001.0000000.9697800.0370429
120.0009680.0000650.0012860.00005513{'n_neighbors': 13}1.0000000.9285711.01.0000000.9230771.01.0000000.9230771.0000001.0000000.9774730.0344411
130.0009250.0000130.0012420.00003614{'n_neighbors': 14}0.9285710.9285711.00.9230770.9230771.01.0000000.9230771.0000001.0000000.9626370.03741117
140.0009280.0000160.0012480.00003315{'n_neighbors': 15}0.9285710.9285711.00.9230770.9230771.01.0000000.9230771.0000001.0000000.9626370.03741117
150.0009610.0000480.0012850.00004816{'n_neighbors': 16}0.9285710.9285711.00.9230770.9230771.01.0000000.9230771.0000001.0000000.9626370.03741117
160.0009580.0000570.0012730.00003417{'n_neighbors': 17}0.9285710.9285711.00.9230770.9230771.01.0000000.9230771.0000001.0000000.9626370.03741117
170.0009520.0000390.0013010.00004718{'n_neighbors': 18}0.9285710.9285711.00.9230770.9230771.01.0000000.9230771.0000001.0000000.9626370.03741117
180.0009490.0000350.0012890.00007219{'n_neighbors': 19}0.9285710.9285711.00.9230770.9230771.01.0000000.9230771.0000001.0000000.9626370.03741117
190.0009280.0000110.0012710.00003520{'n_neighbors': 20}0.8571430.9285711.00.9230770.9230771.01.0000001.0000001.0000001.0000000.9631870.04877711
200.0009530.0000360.0013070.00008821{'n_neighbors': 21}0.9285710.9285711.00.9230770.9230771.01.0000001.0000001.0000001.0000000.9703300.0363803
210.0009490.0000300.0012720.00002422{'n_neighbors': 22}0.9285710.9285711.00.9230770.9230771.01.0000001.0000001.0000001.0000000.9703300.0363803
220.0009860.0000820.0012890.00004723{'n_neighbors': 23}0.9285710.9285711.00.9230770.9230771.01.0000001.0000001.0000000.9230770.9626370.03741113
230.0009390.0000120.0012850.00003224{'n_neighbors': 24}0.9285710.9285711.00.9230770.9230771.01.0000001.0000001.0000001.0000000.9703300.0363803
240.0009490.0000340.0012900.00004125{'n_neighbors': 25}0.9285710.9285711.00.8461540.9230771.01.0000001.0000001.0000000.9230770.9549450.05040633
250.0009330.0000190.0012810.00002926{'n_neighbors': 26}0.9285710.9285711.00.8461540.9230771.01.0000001.0000001.0000001.0000000.9626370.05082313
260.0009480.0000320.0013220.00007527{'n_neighbors': 27}0.9285710.9285711.00.8461540.9230771.01.0000001.0000001.0000000.9230770.9549450.05040633
270.0009280.0000120.0012780.00004328{'n_neighbors': 28}0.9285710.9285711.00.9230770.9230771.01.0000001.0000001.0000000.9230770.9626370.03741113
280.0009390.0000170.0012900.00004629{'n_neighbors': 29}0.9285710.9285711.00.9230770.9230771.01.0000001.0000001.0000000.9230770.9626370.03741113
290.0009570.0000360.0013200.00006330{'n_neighbors': 30}0.8571430.9285711.00.9230770.9230771.01.0000000.9230771.0000000.9230770.9478020.04683242
300.0009650.0000740.0013100.00004931{'n_neighbors': 31}0.8571430.9285711.00.9230770.9230771.01.0000001.0000001.0000000.9230770.9554950.04843029
310.0009270.0000070.0012630.00001332{'n_neighbors': 32}0.8571430.9285711.00.9230770.9230771.01.0000001.0000001.0000000.9230770.9554950.04843029
320.0009240.0000180.0012830.00003033{'n_neighbors': 33}0.8571430.9285711.00.9230770.9230771.01.0000001.0000001.0000000.9230770.9554950.04843029
330.0009470.0000430.0013610.00012434{'n_neighbors': 34}0.8571430.9285711.00.9230770.9230771.01.0000001.0000001.0000001.0000000.9631870.04877711
340.0009290.0000050.0012640.00001035{'n_neighbors': 35}0.8571430.9285711.00.9230770.9230771.00.9230771.0000001.0000001.0000000.9554950.04843029
350.0009610.0000310.0013390.00005236{'n_neighbors': 36}0.9285710.9285711.00.9230770.9230771.01.0000000.9230771.0000001.0000000.9626370.03741117
360.0009800.0001400.0012910.00002737{'n_neighbors': 37}0.9285710.9285711.00.9230770.9230771.01.0000000.9230771.0000001.0000000.9626370.03741117
370.0009630.0000640.0013070.00004438{'n_neighbors': 38}0.9285710.9285711.00.9230770.9230771.01.0000000.9230771.0000001.0000000.9626370.03741117
380.0009530.0000530.0013130.00003939{'n_neighbors': 39}0.9285710.9285711.00.9230770.9230771.01.0000000.9230771.0000001.0000000.9626370.03741117
390.0009330.0000110.0013120.00004740{'n_neighbors': 40}0.9285710.9285711.00.9230770.9230771.01.0000000.9230771.0000000.9230770.9549450.03684235
400.0009510.0000340.0013590.00014541{'n_neighbors': 41}0.9285710.9285711.01.0000000.9230771.01.0000000.9230771.0000001.0000000.9703300.0363803
410.0009320.0000190.0012970.00002942{'n_neighbors': 42}0.9285710.9285711.00.9230770.9230771.01.0000000.9230771.0000000.9230770.9549450.03684235
420.0009510.0000480.0013150.00003843{'n_neighbors': 43}0.9285710.9285711.00.9230770.9230771.01.0000000.9230771.0000000.9230770.9549450.03684235
430.0009380.0000120.0013170.00005144{'n_neighbors': 44}0.9285710.9285711.00.8461540.9230771.01.0000000.9230771.0000000.9230770.9472530.04878744
440.0009670.0000520.0013440.00005045{'n_neighbors': 45}0.9285710.9285711.00.8461540.9230771.01.0000000.9230771.0000000.9230770.9472530.04878744
450.0009490.0000490.0012950.00001346{'n_neighbors': 46}0.9285710.9285711.00.8461540.9230771.01.0000000.9230771.0000000.9230770.9472530.04878744
460.0009620.0000500.0013300.00004247{'n_neighbors': 47}0.8571430.9285711.00.9230770.9230771.01.0000000.9230771.0000000.9230770.9478020.04683242
470.0009360.0000120.0012970.00001848{'n_neighbors': 48}0.9285710.9285711.00.9230770.9230771.01.0000000.9230770.9230770.9230770.9472530.03459444
480.0009460.0000100.0013270.00004549{'n_neighbors': 49}0.9285710.9285711.00.9230770.9230771.01.0000000.9230770.9230770.9230770.9472530.03459444
490.0009320.0000110.0012930.00001550{'n_neighbors': 50}0.9285710.9285711.00.9230770.9230771.01.0000000.9230770.9230770.9230770.9472530.03459444
\n", + "
" + ], + "text/plain": [ + " mean_fit_time std_fit_time mean_score_time std_score_time \\\n", + "0 0.005224 0.011857 0.004419 0.008788 \n", + "1 0.000935 0.000028 0.001249 0.000068 \n", + "2 0.001057 0.000247 0.001277 0.000061 \n", + "3 0.001061 0.000187 0.004589 0.009698 \n", + "4 0.001042 0.000106 0.065221 0.191469 \n", + "5 0.001389 0.000854 0.001617 0.000422 \n", + "6 0.000990 0.000061 0.001286 0.000058 \n", + "7 0.000989 0.000045 0.001357 0.000100 \n", + "8 0.000980 0.000053 0.001340 0.000133 \n", + "9 0.000955 0.000062 0.001259 0.000055 \n", + "10 0.000946 0.000052 0.001251 0.000038 \n", + "11 0.000955 0.000026 0.001298 0.000100 \n", + "12 0.000968 0.000065 0.001286 0.000055 \n", + "13 0.000925 0.000013 0.001242 0.000036 \n", + "14 0.000928 0.000016 0.001248 0.000033 \n", + "15 0.000961 0.000048 0.001285 0.000048 \n", + "16 0.000958 0.000057 0.001273 0.000034 \n", + "17 0.000952 0.000039 0.001301 0.000047 \n", + "18 0.000949 0.000035 0.001289 0.000072 \n", + "19 0.000928 0.000011 0.001271 0.000035 \n", + "20 0.000953 0.000036 0.001307 0.000088 \n", + "21 0.000949 0.000030 0.001272 0.000024 \n", + "22 0.000986 0.000082 0.001289 0.000047 \n", + "23 0.000939 0.000012 0.001285 0.000032 \n", + "24 0.000949 0.000034 0.001290 0.000041 \n", + "25 0.000933 0.000019 0.001281 0.000029 \n", + "26 0.000948 0.000032 0.001322 0.000075 \n", + "27 0.000928 0.000012 0.001278 0.000043 \n", + "28 0.000939 0.000017 0.001290 0.000046 \n", + "29 0.000957 0.000036 0.001320 0.000063 \n", + "30 0.000965 0.000074 0.001310 0.000049 \n", + "31 0.000927 0.000007 0.001263 0.000013 \n", + "32 0.000924 0.000018 0.001283 0.000030 \n", + "33 0.000947 0.000043 0.001361 0.000124 \n", + "34 0.000929 0.000005 0.001264 0.000010 \n", + "35 0.000961 0.000031 0.001339 0.000052 \n", + "36 0.000980 0.000140 0.001291 0.000027 \n", + "37 0.000963 0.000064 0.001307 0.000044 \n", + "38 0.000953 0.000053 0.001313 0.000039 \n", + "39 0.000933 0.000011 0.001312 0.000047 \n", + "40 0.000951 0.000034 0.001359 0.000145 \n", + "41 0.000932 0.000019 0.001297 0.000029 \n", + "42 0.000951 0.000048 0.001315 0.000038 \n", + "43 0.000938 0.000012 0.001317 0.000051 \n", + "44 0.000967 0.000052 0.001344 0.000050 \n", + "45 0.000949 0.000049 0.001295 0.000013 \n", + "46 0.000962 0.000050 0.001330 0.000042 \n", + "47 0.000936 0.000012 0.001297 0.000018 \n", + "48 0.000946 0.000010 0.001327 0.000045 \n", + "49 0.000932 0.000011 0.001293 0.000015 \n", + "\n", + " param_n_neighbors params split0_test_score \\\n", + "0 1 {'n_neighbors': 1} 1.000000 \n", + "1 2 {'n_neighbors': 2} 1.000000 \n", + "2 3 {'n_neighbors': 3} 1.000000 \n", + "3 4 {'n_neighbors': 4} 0.928571 \n", + "4 5 {'n_neighbors': 5} 1.000000 \n", + "5 6 {'n_neighbors': 6} 0.928571 \n", + "6 7 {'n_neighbors': 7} 1.000000 \n", + "7 8 {'n_neighbors': 8} 0.928571 \n", + "8 9 {'n_neighbors': 9} 1.000000 \n", + "9 10 {'n_neighbors': 10} 1.000000 \n", + "10 11 {'n_neighbors': 11} 0.928571 \n", + "11 12 {'n_neighbors': 12} 1.000000 \n", + "12 13 {'n_neighbors': 13} 1.000000 \n", + "13 14 {'n_neighbors': 14} 0.928571 \n", + "14 15 {'n_neighbors': 15} 0.928571 \n", + "15 16 {'n_neighbors': 16} 0.928571 \n", + "16 17 {'n_neighbors': 17} 0.928571 \n", + "17 18 {'n_neighbors': 18} 0.928571 \n", + "18 19 {'n_neighbors': 19} 0.928571 \n", + "19 20 {'n_neighbors': 20} 0.857143 \n", + "20 21 {'n_neighbors': 21} 0.928571 \n", + "21 22 {'n_neighbors': 22} 0.928571 \n", + "22 23 {'n_neighbors': 23} 0.928571 \n", + "23 24 {'n_neighbors': 24} 0.928571 \n", + "24 25 {'n_neighbors': 25} 0.928571 \n", + "25 26 {'n_neighbors': 26} 0.928571 \n", + "26 27 {'n_neighbors': 27} 0.928571 \n", + "27 28 {'n_neighbors': 28} 0.928571 \n", + "28 29 {'n_neighbors': 29} 0.928571 \n", + "29 30 {'n_neighbors': 30} 0.857143 \n", + "30 31 {'n_neighbors': 31} 0.857143 \n", + "31 32 {'n_neighbors': 32} 0.857143 \n", + "32 33 {'n_neighbors': 33} 0.857143 \n", + "33 34 {'n_neighbors': 34} 0.857143 \n", + "34 35 {'n_neighbors': 35} 0.857143 \n", + "35 36 {'n_neighbors': 36} 0.928571 \n", + "36 37 {'n_neighbors': 37} 0.928571 \n", + "37 38 {'n_neighbors': 38} 0.928571 \n", + "38 39 {'n_neighbors': 39} 0.928571 \n", + "39 40 {'n_neighbors': 40} 0.928571 \n", + "40 41 {'n_neighbors': 41} 0.928571 \n", + "41 42 {'n_neighbors': 42} 0.928571 \n", + "42 43 {'n_neighbors': 43} 0.928571 \n", + "43 44 {'n_neighbors': 44} 0.928571 \n", + "44 45 {'n_neighbors': 45} 0.928571 \n", + "45 46 {'n_neighbors': 46} 0.928571 \n", + "46 47 {'n_neighbors': 47} 0.857143 \n", + "47 48 {'n_neighbors': 48} 0.928571 \n", + "48 49 {'n_neighbors': 49} 0.928571 \n", + "49 50 {'n_neighbors': 50} 0.928571 \n", + "\n", + " split1_test_score split2_test_score split3_test_score \\\n", + "0 0.928571 1.0 0.692308 \n", + "1 0.928571 1.0 0.692308 \n", + "2 0.928571 1.0 0.846154 \n", + "3 0.928571 1.0 0.846154 \n", + "4 0.928571 1.0 0.923077 \n", + "5 1.000000 1.0 0.923077 \n", + "6 0.928571 1.0 0.923077 \n", + "7 1.000000 1.0 0.846154 \n", + "8 0.928571 1.0 0.923077 \n", + "9 0.928571 1.0 0.846154 \n", + "10 0.928571 1.0 0.923077 \n", + "11 0.928571 1.0 0.923077 \n", + "12 0.928571 1.0 1.000000 \n", + "13 0.928571 1.0 0.923077 \n", + "14 0.928571 1.0 0.923077 \n", + "15 0.928571 1.0 0.923077 \n", + "16 0.928571 1.0 0.923077 \n", + "17 0.928571 1.0 0.923077 \n", + "18 0.928571 1.0 0.923077 \n", + "19 0.928571 1.0 0.923077 \n", + "20 0.928571 1.0 0.923077 \n", + "21 0.928571 1.0 0.923077 \n", + "22 0.928571 1.0 0.923077 \n", + "23 0.928571 1.0 0.923077 \n", + "24 0.928571 1.0 0.846154 \n", + "25 0.928571 1.0 0.846154 \n", + "26 0.928571 1.0 0.846154 \n", + "27 0.928571 1.0 0.923077 \n", + "28 0.928571 1.0 0.923077 \n", + "29 0.928571 1.0 0.923077 \n", + "30 0.928571 1.0 0.923077 \n", + "31 0.928571 1.0 0.923077 \n", + "32 0.928571 1.0 0.923077 \n", + "33 0.928571 1.0 0.923077 \n", + "34 0.928571 1.0 0.923077 \n", + "35 0.928571 1.0 0.923077 \n", + "36 0.928571 1.0 0.923077 \n", + "37 0.928571 1.0 0.923077 \n", + "38 0.928571 1.0 0.923077 \n", + "39 0.928571 1.0 0.923077 \n", + "40 0.928571 1.0 1.000000 \n", + "41 0.928571 1.0 0.923077 \n", + "42 0.928571 1.0 0.923077 \n", + "43 0.928571 1.0 0.846154 \n", + "44 0.928571 1.0 0.846154 \n", + "45 0.928571 1.0 0.846154 \n", + "46 0.928571 1.0 0.923077 \n", + "47 0.928571 1.0 0.923077 \n", + "48 0.928571 1.0 0.923077 \n", + "49 0.928571 1.0 0.923077 \n", + "\n", + " split4_test_score split5_test_score split6_test_score \\\n", + "0 1.000000 1.0 1.000000 \n", + "1 1.000000 1.0 1.000000 \n", + "2 1.000000 1.0 1.000000 \n", + "3 0.923077 1.0 1.000000 \n", + "4 0.923077 1.0 1.000000 \n", + "5 0.923077 1.0 1.000000 \n", + "6 0.923077 1.0 1.000000 \n", + "7 0.923077 1.0 1.000000 \n", + "8 0.923077 1.0 1.000000 \n", + "9 0.923077 1.0 0.923077 \n", + "10 0.923077 1.0 1.000000 \n", + "11 0.923077 1.0 1.000000 \n", + "12 0.923077 1.0 1.000000 \n", + "13 0.923077 1.0 1.000000 \n", + "14 0.923077 1.0 1.000000 \n", + "15 0.923077 1.0 1.000000 \n", + "16 0.923077 1.0 1.000000 \n", + "17 0.923077 1.0 1.000000 \n", + "18 0.923077 1.0 1.000000 \n", + "19 0.923077 1.0 1.000000 \n", + "20 0.923077 1.0 1.000000 \n", + "21 0.923077 1.0 1.000000 \n", + "22 0.923077 1.0 1.000000 \n", + "23 0.923077 1.0 1.000000 \n", + "24 0.923077 1.0 1.000000 \n", + "25 0.923077 1.0 1.000000 \n", + "26 0.923077 1.0 1.000000 \n", + "27 0.923077 1.0 1.000000 \n", + "28 0.923077 1.0 1.000000 \n", + "29 0.923077 1.0 1.000000 \n", + "30 0.923077 1.0 1.000000 \n", + "31 0.923077 1.0 1.000000 \n", + "32 0.923077 1.0 1.000000 \n", + "33 0.923077 1.0 1.000000 \n", + "34 0.923077 1.0 0.923077 \n", + "35 0.923077 1.0 1.000000 \n", + "36 0.923077 1.0 1.000000 \n", + "37 0.923077 1.0 1.000000 \n", + "38 0.923077 1.0 1.000000 \n", + "39 0.923077 1.0 1.000000 \n", + "40 0.923077 1.0 1.000000 \n", + "41 0.923077 1.0 1.000000 \n", + "42 0.923077 1.0 1.000000 \n", + "43 0.923077 1.0 1.000000 \n", + "44 0.923077 1.0 1.000000 \n", + "45 0.923077 1.0 1.000000 \n", + "46 0.923077 1.0 1.000000 \n", + "47 0.923077 1.0 1.000000 \n", + "48 0.923077 1.0 1.000000 \n", + "49 0.923077 1.0 1.000000 \n", + "\n", + " split7_test_score split8_test_score split9_test_score mean_test_score \\\n", + "0 1.000000 1.000000 0.923077 0.954396 \n", + "1 0.923077 0.923077 0.923077 0.939011 \n", + "2 1.000000 0.923077 0.923077 0.962088 \n", + "3 1.000000 0.923077 1.000000 0.954945 \n", + "4 1.000000 0.923077 1.000000 0.969780 \n", + "5 1.000000 0.923077 1.000000 0.969780 \n", + "6 1.000000 1.000000 1.000000 0.977473 \n", + "7 0.923077 0.923077 1.000000 0.954396 \n", + "8 0.923077 1.000000 1.000000 0.969780 \n", + "9 0.923077 1.000000 1.000000 0.954396 \n", + "10 0.923077 1.000000 1.000000 0.962637 \n", + "11 0.923077 1.000000 1.000000 0.969780 \n", + "12 0.923077 1.000000 1.000000 0.977473 \n", + "13 0.923077 1.000000 1.000000 0.962637 \n", + "14 0.923077 1.000000 1.000000 0.962637 \n", + "15 0.923077 1.000000 1.000000 0.962637 \n", + "16 0.923077 1.000000 1.000000 0.962637 \n", + "17 0.923077 1.000000 1.000000 0.962637 \n", + "18 0.923077 1.000000 1.000000 0.962637 \n", + "19 1.000000 1.000000 1.000000 0.963187 \n", + "20 1.000000 1.000000 1.000000 0.970330 \n", + "21 1.000000 1.000000 1.000000 0.970330 \n", + "22 1.000000 1.000000 0.923077 0.962637 \n", + "23 1.000000 1.000000 1.000000 0.970330 \n", + "24 1.000000 1.000000 0.923077 0.954945 \n", + "25 1.000000 1.000000 1.000000 0.962637 \n", + "26 1.000000 1.000000 0.923077 0.954945 \n", + "27 1.000000 1.000000 0.923077 0.962637 \n", + "28 1.000000 1.000000 0.923077 0.962637 \n", + "29 0.923077 1.000000 0.923077 0.947802 \n", + "30 1.000000 1.000000 0.923077 0.955495 \n", + "31 1.000000 1.000000 0.923077 0.955495 \n", + "32 1.000000 1.000000 0.923077 0.955495 \n", + "33 1.000000 1.000000 1.000000 0.963187 \n", + "34 1.000000 1.000000 1.000000 0.955495 \n", + "35 0.923077 1.000000 1.000000 0.962637 \n", + "36 0.923077 1.000000 1.000000 0.962637 \n", + "37 0.923077 1.000000 1.000000 0.962637 \n", + "38 0.923077 1.000000 1.000000 0.962637 \n", + "39 0.923077 1.000000 0.923077 0.954945 \n", + "40 0.923077 1.000000 1.000000 0.970330 \n", + "41 0.923077 1.000000 0.923077 0.954945 \n", + "42 0.923077 1.000000 0.923077 0.954945 \n", + "43 0.923077 1.000000 0.923077 0.947253 \n", + "44 0.923077 1.000000 0.923077 0.947253 \n", + "45 0.923077 1.000000 0.923077 0.947253 \n", + "46 0.923077 1.000000 0.923077 0.947802 \n", + "47 0.923077 0.923077 0.923077 0.947253 \n", + "48 0.923077 0.923077 0.923077 0.947253 \n", + "49 0.923077 0.923077 0.923077 0.947253 \n", + "\n", + " std_test_score rank_test_score \n", + "0 0.092139 39 \n", + "1 0.089628 50 \n", + "2 0.051217 28 \n", + "3 0.050406 35 \n", + "4 0.037042 7 \n", + "5 0.037042 7 \n", + "6 0.034441 1 \n", + "7 0.050719 39 \n", + "8 0.037042 9 \n", + "9 0.050719 39 \n", + "10 0.037411 17 \n", + "11 0.037042 9 \n", + "12 0.034441 1 \n", + "13 0.037411 17 \n", + "14 0.037411 17 \n", + "15 0.037411 17 \n", + "16 0.037411 17 \n", + "17 0.037411 17 \n", + "18 0.037411 17 \n", + "19 0.048777 11 \n", + "20 0.036380 3 \n", + "21 0.036380 3 \n", + "22 0.037411 13 \n", + "23 0.036380 3 \n", + "24 0.050406 33 \n", + "25 0.050823 13 \n", + "26 0.050406 33 \n", + "27 0.037411 13 \n", + "28 0.037411 13 \n", + "29 0.046832 42 \n", + "30 0.048430 29 \n", + "31 0.048430 29 \n", + "32 0.048430 29 \n", + "33 0.048777 11 \n", + "34 0.048430 29 \n", + "35 0.037411 17 \n", + "36 0.037411 17 \n", + "37 0.037411 17 \n", + "38 0.037411 17 \n", + "39 0.036842 35 \n", + "40 0.036380 3 \n", + "41 0.036842 35 \n", + "42 0.036842 35 \n", + "43 0.048787 44 \n", + "44 0.048787 44 \n", + "45 0.048787 44 \n", + "46 0.046832 42 \n", + "47 0.034594 44 \n", + "48 0.034594 44 \n", + "49 0.034594 44 " + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here..." + "knn = KNeighborsClassifier()\n", + "\n", + "parameter_grid = {\n", + " \"n_neighbors\": range(1,51)\n", + "}\n", + "\n", + "wine_tune_grid = GridSearchCV(\n", + " estimator=knn,\n", + " param_grid=parameter_grid,\n", + " cv=10\n", + ")\n", + "\n", + "X_train = wine_train.iloc[:, :-1] # all columns except last\n", + "y_train = wine_train['class'] \n", + "\n", + "wine_tune_grid.fit(X_train, y_train)\n", + "\n", + "accuracy_grid = pd.DataFrame(wine_tune_grid.cv_results_)\n", + "accuracy_grid\n" ] }, { @@ -305,12 +2401,47 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "id": "ffefa9f2", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0.9411764705882353" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here..." + "kvalue = wine_tune_grid.best_params_[\"n_neighbors\"]\n", + "\n", + "wine_subtrain, wine_validation = train_test_split(\n", + " wine_train,\n", + " train_size=0.75,\n", + " stratify=wine_train['class']\n", + ")\n", + "\n", + "X_sub = wine_subtrain.iloc[:, :-1]\n", + "y_sub = wine_subtrain['class']\n", + "\n", + "X_val = wine_validation.iloc[:, :-1]\n", + "y_val = wine_validation['class']\n", + "\n", + "# 3. Fit final KNN using best k\n", + "final_knn = KNeighborsClassifier(n_neighbors=kvalue)\n", + "final_knn.fit(X_sub, y_sub)\n", + "\n", + "# 4. Predict on validation set\n", + "val_predictions = final_knn.predict(X_val)\n", + "\n", + "# 5. Accuracy on validation set\n", + "val_accuracy = accuracy_score(y_val, val_predictions)\n", + "val_accuracy\n", + "\n" ] }, { @@ -365,7 +2496,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3.10.4", + "display_name": "lcr-env", "language": "python", "name": "python3" }, @@ -379,12 +2510,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.19" - }, - "vscode": { - "interpreter": { - "hash": "497a84dc8fec8cf8d24e7e87b6d954c9a18a327edc66feb9b9ea7e9e72cc5c7e" - } + "version": "3.11.1" } }, "nbformat": 4,