{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"junk property Classification Industry level.ipynb","provenance":[{"file_id":"1WLR8rPLjxiTQNyuV4uXBfVx1Ni4LSXBp","timestamp":1605693223282},{"file_id":"1lPNcI9eIO_givFo1Rwd0-05JP1PY9odj","timestamp":1603705018259}],"collapsed_sections":[],"authorship_tag":"ABX9TyNXcmcr6z/af6vlpc1SM+F7"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"0buwUZkZwbqa","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1605793177512,"user_tz":-330,"elapsed":1251,"user":{"displayName":"Mansi Gupta","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg-eSqQt6zvClgSY0_QT_GIvO_LGNz3Cau1wv1S7w=s64","userId":"09741357658171476289"}},"outputId":"77d1f38c-709e-446b-fc44-41e1bba722c3"},"source":["from google.colab import drive\n","drive.mount('/content/drive')"],"execution_count":43,"outputs":[{"output_type":"stream","text":["Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"--TudwOjzdHJ","colab":{"base_uri":"https://localhost:8080/","height":226},"executionInfo":{"status":"ok","timestamp":1605793178105,"user_tz":-330,"elapsed":1602,"user":{"displayName":"Mansi Gupta","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg-eSqQt6zvClgSY0_QT_GIvO_LGNz3Cau1wv1S7w=s64","userId":"09741357658171476289"}},"outputId":"7aa99f1e-ff92-4fc0-815d-a08889f29eea"},"source":["import pandas as pd\n","import numpy as np\n","import warnings \n","warnings.filterwarnings(\"ignore\")\n","import pandas_profiling as pp\n","from sklearn.preprocessing import LabelBinarizer\n","from sklearn.ensemble import RandomForestClassifier\n","from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold\n","from sklearn.metrics import roc_curve, precision_recall_curve, auc, make_scorer, recall_score, accuracy_score, precision_score, confusion_matrix, fbeta_score, roc_auc_score\n","from sklearn.model_selection import train_test_split\n","from sklearn.metrics import classification_report as cr\n","\n","import matplotlib.pyplot as plt\n","plt.style.use(\"ggplot\")\n","df = pd.read_csv('/content/drive/My Drive/collab projects/Classification Project Industry Dataset/Property_train.csv', parse_dates=True)\n","df.head()"],"execution_count":44,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>Junk</th>\n","      <th>InteriorsStyle</th>\n","      <th>PriceIndex8</th>\n","      <th>ListDate</th>\n","      <th>Material</th>\n","      <th>PriceIndex9</th>\n","      <th>Agency</th>\n","      <th>AreaIncomeType</th>\n","      <th>EnvRating</th>\n","      <th>PriceIndex7</th>\n","      <th>ExpeditedListing</th>\n","      <th>PriceIndex4</th>\n","      <th>PriceIndex1</th>\n","      <th>PriceIndex6</th>\n","      <th>PRIMEUNIT</th>\n","      <th>Channel</th>\n","      <th>Zip</th>\n","      <th>InsurancePremiumIndex</th>\n","      <th>PlotType</th>\n","      <th>Architecture</th>\n","      <th>PriceIndex3</th>\n","      <th>Region</th>\n","      <th>PriceIndex5</th>\n","      <th>SubModel</th>\n","      <th>Facade</th>\n","      <th>State</th>\n","      <th>NormalisedPopulation</th>\n","      <th>BuildYear</th>\n","      <th>RegionType</th>\n","      <th>PropertyAge</th>\n","      <th>PriceIndex2</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>0</td>\n","      <td>6LLJ</td>\n","      <td>14674.0</td>\n","      <td>9/7/2010</td>\n","      <td>UT7W</td>\n","      <td>8270</td>\n","      <td>CAT3</td>\n","      <td>B</td>\n","      <td>missing</td>\n","      <td>13143.0</td>\n","      <td>0</td>\n","      <td>14224.0</td>\n","      <td>9217.0</td>\n","      <td>10387.0</td>\n","      <td>missing</td>\n","      <td>Direct</td>\n","      <td>21075</td>\n","      <td>623</td>\n","      <td>WHBI</td>\n","      <td>I3Z9</td>\n","      <td>13108.0</td>\n","      <td>A</td>\n","      <td>9022.0</td>\n","      <td>6E9G</td>\n","      <td>GREEN</td>\n","      <td>MD</td>\n","      <td>42077</td>\n","      <td>2008</td>\n","      <td>A</td>\n","      <td>2</td>\n","      <td>10692.0</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>1</td>\n","      <td>XZ9F</td>\n","      <td>4172.0</td>\n","      <td>1/7/2009</td>\n","      <td>PRN0</td>\n","      <td>3890</td>\n","      <td>CAT2</td>\n","      <td>B</td>\n","      <td>missing</td>\n","      <td>3461.0</td>\n","      <td>0</td>\n","      <td>4404.0</td>\n","      <td>2958.0</td>\n","      <td>3400.0</td>\n","      <td>missing</td>\n","      <td>Direct</td>\n","      <td>73129</td>\n","      <td>1689</td>\n","      <td>WHBI</td>\n","      <td>AVYP</td>\n","      <td>3695.0</td>\n","      <td>A</td>\n","      <td>2742.0</td>\n","      <td>FIVQ</td>\n","      <td>SILVER</td>\n","      <td>OK</td>\n","      <td>77258</td>\n","      <td>2002</td>\n","      <td>A</td>\n","      <td>7</td>\n","      <td>3615.0</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>0</td>\n","      <td>QWEV</td>\n","      <td>8210.0</td>\n","      <td>6/23/2010</td>\n","      <td>A9RA</td>\n","      <td>5900</td>\n","      <td>CAT1</td>\n","      <td>B</td>\n","      <td>missing</td>\n","      <td>6922.0</td>\n","      <td>0</td>\n","      <td>8952.0</td>\n","      <td>4568.0</td>\n","      <td>4862.0</td>\n","      <td>missing</td>\n","      <td>Other</td>\n","      <td>29697</td>\n","      <td>2351</td>\n","      <td>W62B</td>\n","      <td>1M5X</td>\n","      <td>7730.0</td>\n","      <td>A</td>\n","      <td>3768.0</td>\n","      <td>4K0H</td>\n","      <td>SILVER</td>\n","      <td>SC</td>\n","      <td>94514</td>\n","      <td>2004</td>\n","      <td>A</td>\n","      <td>6</td>\n","      <td>5805.0</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>0</td>\n","      <td>MK2K</td>\n","      <td>7309.0</td>\n","      <td>3/19/2009</td>\n","      <td>RRJQ</td>\n","      <td>7460</td>\n","      <td>OTHER</td>\n","      <td>A</td>\n","      <td>missing</td>\n","      <td>6300.0</td>\n","      <td>0</td>\n","      <td>7460.0</td>\n","      <td>5361.0</td>\n","      <td>6305.0</td>\n","      <td>missing</td>\n","      <td>Agent</td>\n","      <td>28273</td>\n","      <td>1933</td>\n","      <td>WLQ6</td>\n","      <td>DKOO</td>\n","      <td>6290.0</td>\n","      <td>A</td>\n","      <td>5370.0</td>\n","      <td>ZLXY</td>\n","      <td>WHITE</td>\n","      <td>NC</td>\n","      <td>82302</td>\n","      <td>2002</td>\n","      <td>C</td>\n","      <td>7</td>\n","      <td>6444.0</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>0</td>\n","      <td>GK4G</td>\n","      <td>9182.0</td>\n","      <td>3/3/2010</td>\n","      <td>HE5A</td>\n","      <td>6400</td>\n","      <td>OTHER</td>\n","      <td>B</td>\n","      <td>missing</td>\n","      <td>8330.0</td>\n","      <td>0</td>\n","      <td>9846.0</td>\n","      <td>5428.0</td>\n","      <td>5718.0</td>\n","      <td>missing</td>\n","      <td>Other</td>\n","      <td>29697</td>\n","      <td>482</td>\n","      <td>WHBI</td>\n","      <td>KML6</td>\n","      <td>8583.0</td>\n","      <td>A</td>\n","      <td>4803.0</td>\n","      <td>FIVQ</td>\n","      <td>SILVER</td>\n","      <td>SC</td>\n","      <td>49176</td>\n","      <td>2007</td>\n","      <td>B</td>\n","      <td>3</td>\n","      <td>6286.0</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["   Junk InteriorsStyle PriceIndex8  ... RegionType PropertyAge  PriceIndex2\n","0     0           6LLJ     14674.0  ...          A           2      10692.0\n","1     1           XZ9F      4172.0  ...          A           7       3615.0\n","2     0           QWEV      8210.0  ...          A           6       5805.0\n","3     0           MK2K      7309.0  ...          C           7       6444.0\n","4     0           GK4G      9182.0  ...          B           3       6286.0\n","\n","[5 rows x 31 columns]"]},"metadata":{"tags":[]},"execution_count":44}]},{"cell_type":"code","metadata":{"id":"g0e6tw4uzdKc","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1605793178107,"user_tz":-330,"elapsed":1376,"user":{"displayName":"Mansi Gupta","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg-eSqQt6zvClgSY0_QT_GIvO_LGNz3Cau1wv1S7w=s64","userId":"09741357658171476289"}},"outputId":"475d451d-c1f6-4d5e-d1df-85ef90207a78"},"source":["print(df.shape)"],"execution_count":45,"outputs":[{"output_type":"stream","text":["(62035, 31)\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"Ao0qhZOd0blO","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1605793178110,"user_tz":-330,"elapsed":723,"user":{"displayName":"Mansi Gupta","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg-eSqQt6zvClgSY0_QT_GIvO_LGNz3Cau1wv1S7w=s64","userId":"09741357658171476289"}},"outputId":"bbb99cc4-2479-435b-d787-bdcc2962c92e"},"source":["df.columns"],"execution_count":46,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Index(['Junk', 'InteriorsStyle', 'PriceIndex8', 'ListDate', 'Material',\n","       'PriceIndex9', 'Agency', 'AreaIncomeType', 'EnvRating', 'PriceIndex7',\n","       'ExpeditedListing', 'PriceIndex4', 'PriceIndex1', 'PriceIndex6',\n","       'PRIMEUNIT', 'Channel', 'Zip', 'InsurancePremiumIndex', 'PlotType',\n","       'Architecture', 'PriceIndex3', 'Region', 'PriceIndex5', 'SubModel',\n","       'Facade', 'State', 'NormalisedPopulation', 'BuildYear', 'RegionType',\n","       'PropertyAge', 'PriceIndex2'],\n","      dtype='object')"]},"metadata":{"tags":[]},"execution_count":46}]},{"cell_type":"code","metadata":{"id":"hkhrRWmL07sd","colab":{"base_uri":"https://localhost:8080/","height":300},"executionInfo":{"status":"ok","timestamp":1605793178844,"user_tz":-330,"elapsed":1172,"user":{"displayName":"Mansi Gupta","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg-eSqQt6zvClgSY0_QT_GIvO_LGNz3Cau1wv1S7w=s64","userId":"09741357658171476289"}},"outputId":"0a57e2c7-2636-4ef0-be3d-3454c0b7daa2"},"source":["df.describe()"],"execution_count":47,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>Junk</th>\n","      <th>PriceIndex9</th>\n","      <th>ExpeditedListing</th>\n","      <th>Zip</th>\n","      <th>InsurancePremiumIndex</th>\n","      <th>NormalisedPopulation</th>\n","      <th>BuildYear</th>\n","      <th>PropertyAge</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>count</th>\n","      <td>62035.000000</td>\n","      <td>62035.000000</td>\n","      <td>62035.000000</td>\n","      <td>62035.000000</td>\n","      <td>62035.000000</td>\n","      <td>62035.000000</td>\n","      <td>62035.000000</td>\n","      <td>62035.00000</td>\n","    </tr>\n","    <tr>\n","      <th>mean</th>\n","      <td>0.122544</td>\n","      <td>6731.444765</td>\n","      <td>0.024680</td>\n","      <td>58038.149109</td>\n","      <td>1277.286854</td>\n","      <td>71462.694302</td>\n","      <td>2005.347739</td>\n","      <td>4.17171</td>\n","    </tr>\n","    <tr>\n","      <th>std</th>\n","      <td>0.327915</td>\n","      <td>1766.130806</td>\n","      <td>0.155148</td>\n","      <td>26143.635828</td>\n","      <td>600.787708</td>\n","      <td>14583.079578</td>\n","      <td>1.730014</td>\n","      <td>1.70878</td>\n","    </tr>\n","    <tr>\n","      <th>min</th>\n","      <td>0.000000</td>\n","      <td>225.000000</td>\n","      <td>0.000000</td>\n","      <td>2764.000000</td>\n","      <td>462.000000</td>\n","      <td>4825.000000</td>\n","      <td>2001.000000</td>\n","      <td>0.00000</td>\n","    </tr>\n","    <tr>\n","      <th>25%</th>\n","      <td>0.000000</td>\n","      <td>5430.000000</td>\n","      <td>0.000000</td>\n","      <td>32124.000000</td>\n","      <td>837.000000</td>\n","      <td>61803.500000</td>\n","      <td>2004.000000</td>\n","      <td>3.00000</td>\n","    </tr>\n","    <tr>\n","      <th>50%</th>\n","      <td>0.000000</td>\n","      <td>6705.000000</td>\n","      <td>0.000000</td>\n","      <td>73108.000000</td>\n","      <td>1155.000000</td>\n","      <td>73361.000000</td>\n","      <td>2005.000000</td>\n","      <td>4.00000</td>\n","    </tr>\n","    <tr>\n","      <th>75%</th>\n","      <td>0.000000</td>\n","      <td>7900.000000</td>\n","      <td>0.000000</td>\n","      <td>80022.000000</td>\n","      <td>1623.000000</td>\n","      <td>82370.500000</td>\n","      <td>2007.000000</td>\n","      <td>5.00000</td>\n","    </tr>\n","    <tr>\n","      <th>max</th>\n","      <td>1.000000</td>\n","      <td>38785.000000</td>\n","      <td>1.000000</td>\n","      <td>99224.000000</td>\n","      <td>7498.000000</td>\n","      <td>115717.000000</td>\n","      <td>2010.000000</td>\n","      <td>9.00000</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["               Junk   PriceIndex9  ...     BuildYear  PropertyAge\n","count  62035.000000  62035.000000  ...  62035.000000  62035.00000\n","mean       0.122544   6731.444765  ...   2005.347739      4.17171\n","std        0.327915   1766.130806  ...      1.730014      1.70878\n","min        0.000000    225.000000  ...   2001.000000      0.00000\n","25%        0.000000   5430.000000  ...   2004.000000      3.00000\n","50%        0.000000   6705.000000  ...   2005.000000      4.00000\n","75%        0.000000   7900.000000  ...   2007.000000      5.00000\n","max        1.000000  38785.000000  ...   2010.000000      9.00000\n","\n","[8 rows x 8 columns]"]},"metadata":{"tags":[]},"execution_count":47}]},{"cell_type":"code","metadata":{"id":"vRAu5srO1Zv7","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1605793179291,"user_tz":-330,"elapsed":1344,"user":{"displayName":"Mansi Gupta","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg-eSqQt6zvClgSY0_QT_GIvO_LGNz3Cau1wv1S7w=s64","userId":"09741357658171476289"}},"outputId":"4e14f16e-2176-4caa-9431-884474d41f5b"},"source":["list(zip(df.columns,df.dtypes,df.nunique()))"],"execution_count":48,"outputs":[{"output_type":"execute_result","data":{"text/plain":["[('Junk', dtype('int64'), 2),\n"," ('InteriorsStyle', dtype('O'), 1036),\n"," ('PriceIndex8', dtype('O'), 12686),\n"," ('ListDate', dtype('O'), 516),\n"," ('Material', dtype('O'), 134),\n"," ('PriceIndex9', dtype('int64'), 2019),\n"," ('Agency', dtype('O'), 5),\n"," ('AreaIncomeType', dtype('O'), 4),\n"," ('EnvRating', dtype('O'), 3),\n"," ('PriceIndex7', dtype('O'), 12078),\n"," ('ExpeditedListing', dtype('int64'), 2),\n"," ('PriceIndex4', dtype('O'), 13006),\n"," ('PriceIndex1', dtype('O'), 10053),\n"," ('PriceIndex6', dtype('O'), 10863),\n"," ('PRIMEUNIT', dtype('O'), 3),\n"," ('Channel', dtype('O'), 3),\n"," ('Zip', dtype('int64'), 152),\n"," ('InsurancePremiumIndex', dtype('int64'), 277),\n"," ('PlotType', dtype('O'), 13),\n"," ('Architecture', dtype('O'), 33),\n"," ('PriceIndex3', dtype('O'), 12309),\n"," ('Region', dtype('O'), 3),\n"," ('PriceIndex5', dtype('O'), 10011),\n"," ('SubModel', dtype('O'), 831),\n"," ('Facade', dtype('O'), 17),\n"," ('State', dtype('O'), 37),\n"," ('NormalisedPopulation', dtype('int64'), 36675),\n"," ('BuildYear', dtype('int64'), 10),\n"," ('RegionType', dtype('O'), 5),\n"," ('PropertyAge', dtype('int64'), 10),\n"," ('PriceIndex2', dtype('O'), 11010)]"]},"metadata":{"tags":[]},"execution_count":48}]},{"cell_type":"code","metadata":{"id":"JDTu0U8RAcbC","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1605793184835,"user_tz":-330,"elapsed":856,"user":{"displayName":"Mansi Gupta","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg-eSqQt6zvClgSY0_QT_GIvO_LGNz3Cau1wv1S7w=s64","userId":"09741357658171476289"}},"outputId":"13ebe2b3-90f2-429a-c238-9388fd450684"},"source":["cutoff = (5/100)* df.shape[0]\n","cutoff"],"execution_count":49,"outputs":[{"output_type":"execute_result","data":{"text/plain":["3101.75"]},"metadata":{"tags":[]},"execution_count":49}]},{"cell_type":"code","metadata":{"id":"UJlX9NLaAyMJ","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1605793193909,"user_tz":-330,"elapsed":834,"user":{"displayName":"Mansi Gupta","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg-eSqQt6zvClgSY0_QT_GIvO_LGNz3Cau1wv1S7w=s64","userId":"09741357658171476289"}},"outputId":"16de346f-211c-4ec9-a2f8-1b14815780b4"},"source":["numcol = [\"PriceIndex5\",\"PriceIndex7\",\"PriceIndex4\",\"PriceIndex1\",\"PriceIndex6\",\"PriceIndex2\",\"PriceIndex3\",\"PriceIndex8\", \"PriceIndex9\"]\n","for col in numcol:\n","  df[col] = pd.to_numeric(df[col], errors=\"coerce\")\n","list(zip(df.columns,df.dtypes,df.nunique()))"],"execution_count":50,"outputs":[{"output_type":"execute_result","data":{"text/plain":["[('Junk', dtype('int64'), 2),\n"," ('InteriorsStyle', dtype('O'), 1036),\n"," ('PriceIndex8', dtype('float64'), 12685),\n"," ('ListDate', dtype('O'), 516),\n"," ('Material', dtype('O'), 134),\n"," ('PriceIndex9', dtype('int64'), 2019),\n"," ('Agency', dtype('O'), 5),\n"," ('AreaIncomeType', dtype('O'), 4),\n"," ('EnvRating', dtype('O'), 3),\n"," ('PriceIndex7', dtype('float64'), 12077),\n"," ('ExpeditedListing', dtype('int64'), 2),\n"," ('PriceIndex4', dtype('float64'), 13005),\n"," ('PriceIndex1', dtype('float64'), 10052),\n"," ('PriceIndex6', dtype('float64'), 10862),\n"," ('PRIMEUNIT', dtype('O'), 3),\n"," ('Channel', dtype('O'), 3),\n"," ('Zip', dtype('int64'), 152),\n"," ('InsurancePremiumIndex', dtype('int64'), 277),\n"," ('PlotType', dtype('O'), 13),\n"," ('Architecture', dtype('O'), 33),\n"," ('PriceIndex3', dtype('float64'), 12308),\n"," ('Region', dtype('O'), 3),\n"," ('PriceIndex5', dtype('float64'), 10010),\n"," ('SubModel', dtype('O'), 831),\n"," ('Facade', dtype('O'), 17),\n"," ('State', dtype('O'), 37),\n"," ('NormalisedPopulation', dtype('int64'), 36675),\n"," ('BuildYear', dtype('int64'), 10),\n"," ('RegionType', dtype('O'), 5),\n"," ('PropertyAge', dtype('int64'), 10),\n"," ('PriceIndex2', dtype('float64'), 11009)]"]},"metadata":{"tags":[]},"execution_count":50}]},{"cell_type":"code","metadata":{"id":"i7Yzv4StCT2I","colab":{"base_uri":"https://localhost:8080/","height":506},"executionInfo":{"status":"ok","timestamp":1605781243120,"user_tz":-330,"elapsed":1445,"user":{"displayName":"Mansi Gupta","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg-eSqQt6zvClgSY0_QT_GIvO_LGNz3Cau1wv1S7w=s64","userId":"09741357658171476289"}},"outputId":"ddd28fa3-aa79-42f9-b3b0-4792fcd3997b"},"source":["cols = df.select_dtypes([\"object\"])\n","for colname in cols:\n","  cats = df[colname].value_counts()\n","  keepcat = cats.index[cats >= cutoff]\n","  for cat in keepcat:\n","    if cat == \"missing\":\n","      continue\n","    name=colname+'_'+cat\n","    df[name] = (df[colname]==cat).astype(int)\n","  del df[colname]\n","  print(colname)\n","df.head()"],"execution_count":27,"outputs":[{"output_type":"stream","text":["InteriorsStyle\n","ListDate\n","Material\n","Agency\n","AreaIncomeType\n","EnvRating\n","PRIMEUNIT\n","Channel\n","PlotType\n","Architecture\n","Region\n","SubModel\n","Facade\n","State\n","RegionType\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>Junk</th>\n","      <th>PriceIndex8</th>\n","      <th>PriceIndex9</th>\n","      <th>PriceIndex7</th>\n","      <th>ExpeditedListing</th>\n","      <th>PriceIndex4</th>\n","      <th>PriceIndex1</th>\n","      <th>PriceIndex6</th>\n","      <th>Zip</th>\n","      <th>InsurancePremiumIndex</th>\n","      <th>PriceIndex3</th>\n","      <th>PriceIndex5</th>\n","      <th>NormalisedPopulation</th>\n","      <th>BuildYear</th>\n","      <th>PropertyAge</th>\n","      <th>PriceIndex2</th>\n","      <th>Material_PRN0</th>\n","      <th>Material_C8A4</th>\n","      <th>Material_ZCI3</th>\n","      <th>Material_NYFD</th>\n","      <th>Agency_CAT1</th>\n","      <th>Agency_CAT2</th>\n","      <th>Agency_CAT3</th>\n","      <th>Agency_OTHER</th>\n","      <th>AreaIncomeType_A</th>\n","      <th>AreaIncomeType_B</th>\n","      <th>Channel_Direct</th>\n","      <th>Channel_Other</th>\n","      <th>Channel_Agent</th>\n","      <th>PlotType_WHBI</th>\n","      <th>PlotType_W62B</th>\n","      <th>PlotType_32T2</th>\n","      <th>PlotType_DIJX</th>\n","      <th>PlotType_WLQ6</th>\n","      <th>Architecture_YIK5</th>\n","      <th>Architecture_TP7R</th>\n","      <th>Architecture_I3Z9</th>\n","      <th>Architecture_AVYP</th>\n","      <th>Architecture_1M5X</th>\n","      <th>Region_A</th>\n","      <th>SubModel_FIVQ</th>\n","      <th>SubModel_KT8F</th>\n","      <th>SubModel_UA42</th>\n","      <th>Facade_SILVER</th>\n","      <th>Facade_WHITE</th>\n","      <th>Facade_BLUE</th>\n","      <th>Facade_GREY</th>\n","      <th>Facade_BLACK</th>\n","      <th>Facade_RED</th>\n","      <th>Facade_GOLD</th>\n","      <th>State_TX</th>\n","      <th>State_FL</th>\n","      <th>State_CA</th>\n","      <th>State_NC</th>\n","      <th>State_AZ</th>\n","      <th>State_CO</th>\n","      <th>State_SC</th>\n","      <th>RegionType_A</th>\n","      <th>RegionType_B</th>\n","      <th>RegionType_C</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>0</td>\n","      <td>14674.0</td>\n","      <td>8270</td>\n","      <td>13143.0</td>\n","      <td>0</td>\n","      <td>14224.0</td>\n","      <td>9217.0</td>\n","      <td>10387.0</td>\n","      <td>21075</td>\n","      <td>623</td>\n","      <td>13108.0</td>\n","      <td>9022.0</td>\n","      <td>42077</td>\n","      <td>2008</td>\n","      <td>2</td>\n","      <td>10692.0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>0</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>1</td>\n","      <td>4172.0</td>\n","      <td>3890</td>\n","      <td>3461.0</td>\n","      <td>0</td>\n","      <td>4404.0</td>\n","      <td>2958.0</td>\n","      <td>3400.0</td>\n","      <td>73129</td>\n","      <td>1689</td>\n","      <td>3695.0</td>\n","      <td>2742.0</td>\n","      <td>77258</td>\n","      <td>2002</td>\n","      <td>7</td>\n","      <td>3615.0</td>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>0</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>0</td>\n","      <td>8210.0</td>\n","      <td>5900</td>\n","      <td>6922.0</td>\n","      <td>0</td>\n","      <td>8952.0</td>\n","      <td>4568.0</td>\n","      <td>4862.0</td>\n","      <td>29697</td>\n","      <td>2351</td>\n","      <td>7730.0</td>\n","      <td>3768.0</td>\n","      <td>94514</td>\n","      <td>2004</td>\n","      <td>6</td>\n","      <td>5805.0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>0</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>0</td>\n","      <td>7309.0</td>\n","      <td>7460</td>\n","      <td>6300.0</td>\n","      <td>0</td>\n","      <td>7460.0</td>\n","      <td>5361.0</td>\n","      <td>6305.0</td>\n","      <td>28273</td>\n","      <td>1933</td>\n","      <td>6290.0</td>\n","      <td>5370.0</td>\n","      <td>82302</td>\n","      <td>2002</td>\n","      <td>7</td>\n","      <td>6444.0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>1</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>0</td>\n","      <td>9182.0</td>\n","      <td>6400</td>\n","      <td>8330.0</td>\n","      <td>0</td>\n","      <td>9846.0</td>\n","      <td>5428.0</td>\n","      <td>5718.0</td>\n","      <td>29697</td>\n","      <td>482</td>\n","      <td>8583.0</td>\n","      <td>4803.0</td>\n","      <td>49176</td>\n","      <td>2007</td>\n","      <td>3</td>\n","      <td>6286.0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>1</td>\n","      <td>0</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["   Junk  PriceIndex8  PriceIndex9  ...  RegionType_A  RegionType_B  RegionType_C\n","0     0      14674.0         8270  ...             1             0             0\n","1     1       4172.0         3890  ...             1             0             0\n","2     0       8210.0         5900  ...             1             0             0\n","3     0       7309.0         7460  ...             0             0             1\n","4     0       9182.0         6400  ...             0             1             0\n","\n","[5 rows x 60 columns]"]},"metadata":{"tags":[]},"execution_count":27}]},{"cell_type":"code","metadata":{"id":"Tm8G0nt-TpiJ","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1605781243123,"user_tz":-330,"elapsed":1117,"user":{"displayName":"Mansi Gupta","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg-eSqQt6zvClgSY0_QT_GIvO_LGNz3Cau1wv1S7w=s64","userId":"09741357658171476289"}},"outputId":"2a8c5ec8-a16d-4876-c3ff-1c1eda171414"},"source":["print(df.shape)\n","df.dropna(axis=0, inplace=True)\n","print(df.shape)"],"execution_count":28,"outputs":[{"output_type":"stream","text":["(62035, 60)\n","(61763, 60)\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"2Ex8rVfNWUC6","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1605781245818,"user_tz":-330,"elapsed":953,"user":{"displayName":"Mansi Gupta","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg-eSqQt6zvClgSY0_QT_GIvO_LGNz3Cau1wv1S7w=s64","userId":"09741357658171476289"}},"outputId":"bd5b3a38-afca-44f6-daa3-0c0bc3d598bc"},"source":["df.shape"],"execution_count":29,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(61763, 60)"]},"metadata":{"tags":[]},"execution_count":29}]},{"cell_type":"code","metadata":{"id":"E67wAhFZbfUX","executionInfo":{"status":"ok","timestamp":1605781247393,"user_tz":-330,"elapsed":1412,"user":{"displayName":"Mansi Gupta","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg-eSqQt6zvClgSY0_QT_GIvO_LGNz3Cau1wv1S7w=s64","userId":"09741357658171476289"}}},"source":["def report(results, n_top=3):\n","    for i in range(1, n_top + 1):\n","        candidates = np.flatnonzero(results['rank_test_score'] == i)\n","        for candidate in candidates:\n","            print(\"Model with rank: {0}\".format(i))\n","            print(\"Mean validation score: {0:.3f} (std: {1:.5f})\".format(\n","                  results['mean_test_score'][candidate],\n","                  results['std_test_score'][candidate]))\n","            print(\"Parameters: {0}\".format(results['params'][candidate]))\n","            print(\"\")\n","\n","\n","\n","def grid_search_wrapper(refit_score='recall_score'):\n","    \"\"\"\n","    fits a GridSearchCV classifier using refit_score for optimization\n","    prints classifier performance metrics\n","    \"\"\"\n","    skf = StratifiedKFold(n_splits=10)\n","    grid_search = GridSearchCV(clf, param_grid, scoring=scorers, refit=refit_score,\n","                           cv=skf, return_train_score=True, n_jobs=-1, verbose=30)\n","    grid_search.fit(xtrain.values, ytrain.values)\n","\n","    # make the predictions\n","    ypred = grid_search.predict(xtest.values)\n","\n","    print('Best params for {}'.format(refit_score))\n","    print(grid_search.best_params_)\n","    #report(grid_search.cv_results_, n_top=3)\n","    # confusion matrix on the test data.\n","    print('\\nConfusion matrix of Random Forest optimized for {} on the test data:'.format(refit_score))\n","    print(pd.DataFrame(confusion_matrix(ytest, ypred),\n","                 columns=['pred_neg', 'pred_pos'], index=['neg', 'pos']))\n","    return grid_search"],"execution_count":30,"outputs":[]},{"cell_type":"code","metadata":{"id":"xQwp9J2IQjNc","executionInfo":{"status":"ok","timestamp":1605785245120,"user_tz":-330,"elapsed":1816,"user":{"displayName":"Mansi Gupta","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg-eSqQt6zvClgSY0_QT_GIvO_LGNz3Cau1wv1S7w=s64","userId":"09741357658171476289"}}},"source":["xtrain, xtest, ytrain, ytest = train_test_split(df[df.columns[1:]],df.Junk, test_size=0.3, stratify=df.Junk)\n"],"execution_count":31,"outputs":[]},{"cell_type":"code","metadata":{"id":"_Ci8axLCRxQc","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1605777800030,"user_tz":-330,"elapsed":6001303,"user":{"displayName":"Mansi Gupta","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg-eSqQt6zvClgSY0_QT_GIvO_LGNz3Cau1wv1S7w=s64","userId":"09741357658171476289"}},"outputId":"68337d12-1fd7-48de-caca-d7df1e51e8b4"},"source":["\n","#deciding Beta value for model :\n","\n","clf = RandomForestClassifier()\n","param_grid = {\n","    \"class_weight\":[{1:0.122,0:0.877}],\n","    \"n_estimators\":[500],\n","    #'min_samples_split': [3, 5, 10], \n","    #'max_depth': [10, 20, 30],\n","    #'max_features': [3, 5, 10, 20]\n","}\n","\n","for beta_val in {0.5,0.7,1,1.5,2,5,10,20,40,70,90}:\n","  scorers = {\n","      'precision_score': make_scorer(precision_score),\n","      'recall_score': make_scorer(recall_score),\n","      'accuracy_score': make_scorer(accuracy_score),\n","      'fbeta_score': make_scorer(fbeta_score, beta=beta_val)\n","  }\n","  print('*********************************************************************************')\n","  print('__________________________________',beta_val,'________________________________________')\n","  grid_search_clf = grid_search_wrapper(refit_score='fbeta_score')\n","  print('*********************************************************************************') \n"],"execution_count":14,"outputs":[{"output_type":"stream","text":["*********************************************************************************\n","__________________________________ 0.5 ________________________________________\n","Fitting 10 folds for each of 1 candidates, totalling 10 fits\n"],"name":"stdout"},{"output_type":"stream","text":["[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n","[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:  1.6min\n","[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:  1.6min\n","[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:  3.1min\n","[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:  3.2min\n","[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:  4.7min\n","[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:  4.7min\n","[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:  6.2min\n","[Parallel(n_jobs=-1)]: Done   8 out of  10 | elapsed:  6.3min remaining:  1.6min\n","[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  7.8min remaining:    0.0s\n","[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  7.8min finished\n"],"name":"stderr"},{"output_type":"stream","text":["Best params for fbeta_score\n","{'class_weight': {1: 0.122, 0: 0.877}, 'n_estimators': 500}\n","\n","Confusion matrix of Random Forest optimized for fbeta_score on the test data:\n","     pred_neg  pred_pos\n","neg     16165        92\n","pos      1821       451\n","*********************************************************************************\n","*********************************************************************************\n","__________________________________ 0.7 ________________________________________\n","Fitting 10 folds for each of 1 candidates, totalling 10 fits\n"],"name":"stdout"},{"output_type":"stream","text":["[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n","[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:  1.6min\n","[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:  1.6min\n","[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:  3.2min\n","[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:  3.2min\n","[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:  4.7min\n","[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:  4.7min\n","[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:  6.2min\n","[Parallel(n_jobs=-1)]: Done   8 out of  10 | elapsed:  6.3min remaining:  1.6min\n","[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  7.8min remaining:    0.0s\n","[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  7.8min finished\n"],"name":"stderr"},{"output_type":"stream","text":["Best params for fbeta_score\n","{'class_weight': {1: 0.122, 0: 0.877}, 'n_estimators': 500}\n","\n","Confusion matrix of Random Forest optimized for fbeta_score on the test data:\n","     pred_neg  pred_pos\n","neg     16161        96\n","pos      1825       447\n","*********************************************************************************\n","*********************************************************************************\n","__________________________________ 2 ________________________________________\n","Fitting 10 folds for each of 1 candidates, totalling 10 fits\n"],"name":"stdout"},{"output_type":"stream","text":["[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n","[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:  1.6min\n","[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:  1.6min\n","[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:  3.1min\n","[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:  3.1min\n","[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:  4.7min\n","[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:  4.7min\n","[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:  6.2min\n","[Parallel(n_jobs=-1)]: Done   8 out of  10 | elapsed:  6.3min remaining:  1.6min\n","[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  7.8min remaining:    0.0s\n","[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  7.8min finished\n"],"name":"stderr"},{"output_type":"stream","text":["Best params for fbeta_score\n","{'class_weight': {1: 0.122, 0: 0.877}, 'n_estimators': 500}\n","\n","Confusion matrix of Random Forest optimized for fbeta_score on the test data:\n","     pred_neg  pred_pos\n","neg     16166        91\n","pos      1826       446\n","*********************************************************************************\n","*********************************************************************************\n","__________________________________ 1 ________________________________________\n","Fitting 10 folds for each of 1 candidates, totalling 10 fits\n"],"name":"stdout"},{"output_type":"stream","text":["[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n","[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:  1.5min\n","[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:  1.6min\n","[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:  3.1min\n","[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:  3.1min\n","[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:  4.6min\n","[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:  4.6min\n","[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:  6.2min\n","[Parallel(n_jobs=-1)]: Done   8 out of  10 | elapsed:  6.2min remaining:  1.6min\n","[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  7.7min remaining:    0.0s\n","[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  7.7min finished\n"],"name":"stderr"},{"output_type":"stream","text":["Best params for fbeta_score\n","{'class_weight': {1: 0.122, 0: 0.877}, 'n_estimators': 500}\n","\n","Confusion matrix of Random Forest optimized for fbeta_score on the test data:\n","     pred_neg  pred_pos\n","neg     16168        89\n","pos      1831       441\n","*********************************************************************************\n","*********************************************************************************\n","__________________________________ 1.5 ________________________________________\n","Fitting 10 folds for each of 1 candidates, totalling 10 fits\n"],"name":"stdout"},{"output_type":"stream","text":["[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n","[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:  1.5min\n","[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:  1.5min\n","[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:  3.1min\n","[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:  3.1min\n","[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:  4.6min\n","[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:  4.6min\n","[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:  6.2min\n","[Parallel(n_jobs=-1)]: Done   8 out of  10 | elapsed:  6.2min remaining:  1.6min\n","[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  7.7min remaining:    0.0s\n","[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  7.7min finished\n"],"name":"stderr"},{"output_type":"stream","text":["Best params for fbeta_score\n","{'class_weight': {1: 0.122, 0: 0.877}, 'n_estimators': 500}\n","\n","Confusion matrix of Random Forest optimized for fbeta_score on the test data:\n","     pred_neg  pred_pos\n","neg     16170        87\n","pos      1828       444\n","*********************************************************************************\n","*********************************************************************************\n","__________________________________ 5 ________________________________________\n","Fitting 10 folds for each of 1 candidates, totalling 10 fits\n"],"name":"stdout"},{"output_type":"stream","text":["[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n","[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:  1.6min\n","[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:  1.6min\n","[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:  3.1min\n","[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:  3.1min\n","[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:  4.7min\n","[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:  4.7min\n","[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:  6.3min\n","[Parallel(n_jobs=-1)]: Done   8 out of  10 | elapsed:  6.4min remaining:  1.6min\n","[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  8.0min remaining:    0.0s\n","[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  8.0min finished\n"],"name":"stderr"},{"output_type":"stream","text":["Best params for fbeta_score\n","{'class_weight': {1: 0.122, 0: 0.877}, 'n_estimators': 500}\n","\n","Confusion matrix of Random Forest optimized for fbeta_score on the test data:\n","     pred_neg  pred_pos\n","neg     16161        96\n","pos      1834       438\n","*********************************************************************************\n","*********************************************************************************\n","__________________________________ 70 ________________________________________\n","Fitting 10 folds for each of 1 candidates, totalling 10 fits\n"],"name":"stdout"},{"output_type":"stream","text":["[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n","[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:  1.6min\n","[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:  1.6min\n","[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:  3.2min\n","[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:  3.2min\n","[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:  4.8min\n","[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:  4.8min\n","[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:  6.4min\n","[Parallel(n_jobs=-1)]: Done   8 out of  10 | elapsed:  6.4min remaining:  1.6min\n","[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  8.0min remaining:    0.0s\n","[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  8.0min finished\n"],"name":"stderr"},{"output_type":"stream","text":["Best params for fbeta_score\n","{'class_weight': {1: 0.122, 0: 0.877}, 'n_estimators': 500}\n","\n","Confusion matrix of Random Forest optimized for fbeta_score on the test data:\n","     pred_neg  pred_pos\n","neg     16164        93\n","pos      1827       445\n","*********************************************************************************\n","*********************************************************************************\n","__________________________________ 40 ________________________________________\n","Fitting 10 folds for each of 1 candidates, totalling 10 fits\n"],"name":"stdout"},{"output_type":"stream","text":["[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n","[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:  1.6min\n","[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:  1.6min\n","[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:  3.2min\n","[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:  3.2min\n","[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:  4.7min\n","[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:  4.8min\n","[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:  6.3min\n","[Parallel(n_jobs=-1)]: Done   8 out of  10 | elapsed:  6.4min remaining:  1.6min\n","[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  7.9min remaining:    0.0s\n","[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  7.9min finished\n"],"name":"stderr"},{"output_type":"stream","text":["Best params for fbeta_score\n","{'class_weight': {1: 0.122, 0: 0.877}, 'n_estimators': 500}\n","\n","Confusion matrix of Random Forest optimized for fbeta_score on the test data:\n","     pred_neg  pred_pos\n","neg     16170        87\n","pos      1827       445\n","*********************************************************************************\n","*********************************************************************************\n","__________________________________ 10 ________________________________________\n","Fitting 10 folds for each of 1 candidates, totalling 10 fits\n"],"name":"stdout"},{"output_type":"stream","text":["[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n","[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:  1.7min\n","[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:  1.7min\n","[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:  3.3min\n","[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:  3.3min\n","[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:  4.9min\n","[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:  4.9min\n","[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:  6.5min\n","[Parallel(n_jobs=-1)]: Done   8 out of  10 | elapsed:  6.5min remaining:  1.6min\n","[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  8.1min remaining:    0.0s\n","[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  8.1min finished\n"],"name":"stderr"},{"output_type":"stream","text":["Best params for fbeta_score\n","{'class_weight': {1: 0.122, 0: 0.877}, 'n_estimators': 500}\n","\n","Confusion matrix of Random Forest optimized for fbeta_score on the test data:\n","     pred_neg  pred_pos\n","neg     16169        88\n","pos      1828       444\n","*********************************************************************************\n","*********************************************************************************\n","__________________________________ 20 ________________________________________\n","Fitting 10 folds for each of 1 candidates, totalling 10 fits\n"],"name":"stdout"},{"output_type":"stream","text":["[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n","[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:  1.7min\n","[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:  1.7min\n","[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:  3.3min\n","[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:  3.3min\n","[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:  4.9min\n","[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:  4.9min\n","[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:  6.4min\n","[Parallel(n_jobs=-1)]: Done   8 out of  10 | elapsed:  6.5min remaining:  1.6min\n","[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  8.0min remaining:    0.0s\n","[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  8.0min finished\n"],"name":"stderr"},{"output_type":"stream","text":["Best params for fbeta_score\n","{'class_weight': {1: 0.122, 0: 0.877}, 'n_estimators': 500}\n","\n","Confusion matrix of Random Forest optimized for fbeta_score on the test data:\n","     pred_neg  pred_pos\n","neg     16170        87\n","pos      1845       427\n","*********************************************************************************\n","*********************************************************************************\n","__________________________________ 90 ________________________________________\n","Fitting 10 folds for each of 1 candidates, totalling 10 fits\n"],"name":"stdout"},{"output_type":"stream","text":["[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n","[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:  1.6min\n","[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:  1.6min\n","[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:  3.2min\n","[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:  3.2min\n","[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:  4.7min\n","[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:  4.8min\n","[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:  6.3min\n","[Parallel(n_jobs=-1)]: Done   8 out of  10 | elapsed:  6.4min remaining:  1.6min\n","[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  8.0min remaining:    0.0s\n","[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  8.0min finished\n"],"name":"stderr"},{"output_type":"stream","text":["Best params for fbeta_score\n","{'class_weight': {1: 0.122, 0: 0.877}, 'n_estimators': 500}\n","\n","Confusion matrix of Random Forest optimized for fbeta_score on the test data:\n","     pred_neg  pred_pos\n","neg     16169        88\n","pos      1819       453\n","*********************************************************************************\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"g-Qo5k-SqRd3","executionInfo":{"status":"ok","timestamp":1605710875910,"user_tz":-330,"elapsed":2706193,"user":{"displayName":"Mansi Gupta","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg-eSqQt6zvClgSY0_QT_GIvO_LGNz3Cau1wv1S7w=s64","userId":"09741357658171476289"}},"outputId":"ad9e639c-d1a5-4002-ff28-f841d570a18f"},"source":["for beta_val in {200,500,1000,1500,2000}:\n","  scorers = {\n","      'precision_score': make_scorer(precision_score),\n","      'recall_score': make_scorer(recall_score),\n","      'accuracy_score': make_scorer(accuracy_score),\n","      'fbeta_score': make_scorer(fbeta_score, beta=beta_val)\n","  }\n","  print('*********************************************************************************')\n","  print('__________________________________',beta_val,'________________________________________')\n","  grid_search_clf = grid_search_wrapper(refit_score='fbeta_score')\n","  print('*********************************************************************************')"],"execution_count":44,"outputs":[{"output_type":"stream","text":["*********************************************************************************\n","__________________________________ 200 ________________________________________\n","Fitting 10 folds for each of 1 candidates, totalling 10 fits\n"],"name":"stdout"},{"output_type":"stream","text":["[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n","[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:  1.6min\n","[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:  1.6min\n","[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:  3.2min\n","[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:  3.2min\n","[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:  4.7min\n","[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:  4.7min\n","[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:  6.3min\n","[Parallel(n_jobs=-1)]: Done   8 out of  10 | elapsed:  6.3min remaining:  1.6min\n","[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  7.8min remaining:    0.0s\n","[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  7.8min finished\n"],"name":"stderr"},{"output_type":"stream","text":["Best params for fbeta_score\n","{'class_weight': {1: 0.122, 0: 0.877}, 'n_estimators': 500}\n","\n","Confusion matrix of Random Forest optimized for fbeta_score on the test data:\n","     pred_neg  pred_pos\n","neg     16132       125\n","pos      1817       455\n","*********************************************************************************\n","*********************************************************************************\n","__________________________________ 1000 ________________________________________\n","Fitting 10 folds for each of 1 candidates, totalling 10 fits\n"],"name":"stdout"},{"output_type":"stream","text":["[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n","[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:  1.5min\n","[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:  1.5min\n","[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:  3.2min\n","[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:  3.2min\n","[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:  4.7min\n","[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:  4.8min\n","[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:  6.3min\n","[Parallel(n_jobs=-1)]: Done   8 out of  10 | elapsed:  6.3min remaining:  1.6min\n","[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  7.8min remaining:    0.0s\n","[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  7.8min finished\n"],"name":"stderr"},{"output_type":"stream","text":["Best params for fbeta_score\n","{'class_weight': {1: 0.122, 0: 0.877}, 'n_estimators': 500}\n","\n","Confusion matrix of Random Forest optimized for fbeta_score on the test data:\n","     pred_neg  pred_pos\n","neg     16145       112\n","pos      1837       435\n","*********************************************************************************\n","*********************************************************************************\n","__________________________________ 2000 ________________________________________\n","Fitting 10 folds for each of 1 candidates, totalling 10 fits\n"],"name":"stdout"},{"output_type":"stream","text":["[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n","[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:  1.6min\n","[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:  1.6min\n","[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:  3.1min\n","[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:  3.1min\n","[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:  4.7min\n","[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:  4.8min\n","[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:  6.3min\n","[Parallel(n_jobs=-1)]: Done   8 out of  10 | elapsed:  6.3min remaining:  1.6min\n","[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  7.9min remaining:    0.0s\n","[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  7.9min finished\n"],"name":"stderr"},{"output_type":"stream","text":["Best params for fbeta_score\n","{'class_weight': {1: 0.122, 0: 0.877}, 'n_estimators': 500}\n","\n","Confusion matrix of Random Forest optimized for fbeta_score on the test data:\n","     pred_neg  pred_pos\n","neg     16133       124\n","pos      1843       429\n","*********************************************************************************\n","*********************************************************************************\n","__________________________________ 500 ________________________________________\n","Fitting 10 folds for each of 1 candidates, totalling 10 fits\n"],"name":"stdout"},{"output_type":"stream","text":["[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n","[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:  1.6min\n","[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:  1.6min\n","[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:  3.1min\n","[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:  3.1min\n","[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:  4.7min\n","[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:  4.7min\n","[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:  6.3min\n","[Parallel(n_jobs=-1)]: Done   8 out of  10 | elapsed:  6.3min remaining:  1.6min\n","[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  7.9min remaining:    0.0s\n","[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  7.9min finished\n"],"name":"stderr"},{"output_type":"stream","text":["Best params for fbeta_score\n","{'class_weight': {1: 0.122, 0: 0.877}, 'n_estimators': 500}\n","\n","Confusion matrix of Random Forest optimized for fbeta_score on the test data:\n","     pred_neg  pred_pos\n","neg     16144       113\n","pos      1828       444\n","*********************************************************************************\n","*********************************************************************************\n","__________________________________ 1500 ________________________________________\n","Fitting 10 folds for each of 1 candidates, totalling 10 fits\n"],"name":"stdout"},{"output_type":"stream","text":["[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n","[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:  1.6min\n","[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:  1.6min\n","[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:  3.1min\n","[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:  3.1min\n","[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:  4.7min\n","[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:  4.7min\n","[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:  6.2min\n","[Parallel(n_jobs=-1)]: Done   8 out of  10 | elapsed:  6.3min remaining:  1.6min\n","[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  7.9min remaining:    0.0s\n","[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  7.9min finished\n"],"name":"stderr"},{"output_type":"stream","text":["Best params for fbeta_score\n","{'class_weight': {1: 0.122, 0: 0.877}, 'n_estimators': 500}\n","\n","Confusion matrix of Random Forest optimized for fbeta_score on the test data:\n","     pred_neg  pred_pos\n","neg     16132       125\n","pos      1830       442\n","*********************************************************************************\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"nGA3Aptg8ZEj","executionInfo":{"status":"ok","timestamp":1605714927583,"user_tz":-330,"elapsed":1771485,"user":{"displayName":"Mansi Gupta","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg-eSqQt6zvClgSY0_QT_GIvO_LGNz3Cau1wv1S7w=s64","userId":"09741357658171476289"}},"outputId":"f80068a6-90d7-40d0-b4bf-eab35bb6f4da"},"source":["from xgboost import XGBClassifier\n","clf = XGBClassifier()\n","\n","param_grid = {\n","        'learning_rate': [0.9],\n","        'class_weight': [{1: 0.122, 0: 0.877}], 'n_estimators': [500],\n","        #'max_depth': [3, 5, 7, 10,15]\n","        #'min_child_weight': [1, 3, 5],\n","        #'subsample': [0.5, 0.7],\n","        #'colsample_bytree': [0.5, 0.7]\n","}\n","\n","for beta_val in {0,1,100,200,500,1000}:\n","  scorers = {\n","      'precision_score': make_scorer(precision_score),\n","      'recall_score': make_scorer(recall_score),\n","      'accuracy_score': make_scorer(accuracy_score),\n","      'fbeta_score': make_scorer(fbeta_score, beta=beta_val)\n","  }\n","  print('*********************************************************************************')\n","  print('__________________________________',beta_val,'________________________________________')\n","  grid_search_clf = grid_search_wrapper(refit_score='fbeta_score')\n","  print('*********************************************************************************')"],"execution_count":46,"outputs":[{"output_type":"stream","text":["*********************************************************************************\n","__________________________________ 0 ________________________________________\n","Fitting 10 folds for each of 1 candidates, totalling 10 fits\n"],"name":"stdout"},{"output_type":"stream","text":["[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n","[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:   52.6s\n","[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:   52.7s\n","[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:  1.7min\n","[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:  1.7min\n","[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:  2.6min\n","[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:  2.6min\n","[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:  3.4min\n","[Parallel(n_jobs=-1)]: Done   8 out of  10 | elapsed:  3.4min remaining:   51.7s\n","[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  4.3min remaining:    0.0s\n","[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  4.3min finished\n"],"name":"stderr"},{"output_type":"stream","text":["Best params for fbeta_score\n","{'class_weight': {1: 0.122, 0: 0.877}, 'learning_rate': 0.9, 'n_estimators': 500}\n","\n","Confusion matrix of Random Forest optimized for fbeta_score on the test data:\n","     pred_neg  pred_pos\n","neg     15686       571\n","pos      1691       581\n","*********************************************************************************\n","*********************************************************************************\n","__________________________________ 1 ________________________________________\n","Fitting 10 folds for each of 1 candidates, totalling 10 fits\n"],"name":"stdout"},{"output_type":"stream","text":["[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n","[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:   55.0s\n","[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:   55.1s\n","[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:  1.8min\n","[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:  1.8min\n","[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:  2.6min\n","[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:  2.6min\n","[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:  3.5min\n","[Parallel(n_jobs=-1)]: Done   8 out of  10 | elapsed:  3.5min remaining:   52.5s\n","[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  4.4min remaining:    0.0s\n","[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  4.4min finished\n"],"name":"stderr"},{"output_type":"stream","text":["Best params for fbeta_score\n","{'class_weight': {1: 0.122, 0: 0.877}, 'learning_rate': 0.9, 'n_estimators': 500}\n","\n","Confusion matrix of Random Forest optimized for fbeta_score on the test data:\n","     pred_neg  pred_pos\n","neg     15686       571\n","pos      1691       581\n","*********************************************************************************\n","*********************************************************************************\n","__________________________________ 100 ________________________________________\n","Fitting 10 folds for each of 1 candidates, totalling 10 fits\n"],"name":"stdout"},{"output_type":"stream","text":["[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n","[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:   51.5s\n","[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:   51.5s\n","[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:  1.7min\n","[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:  1.7min\n","[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:  2.6min\n","[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:  2.6min\n","[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:  3.4min\n","[Parallel(n_jobs=-1)]: Done   8 out of  10 | elapsed:  3.4min remaining:   51.7s\n","[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  4.3min remaining:    0.0s\n","[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  4.3min finished\n"],"name":"stderr"},{"output_type":"stream","text":["Best params for fbeta_score\n","{'class_weight': {1: 0.122, 0: 0.877}, 'learning_rate': 0.9, 'n_estimators': 500}\n","\n","Confusion matrix of Random Forest optimized for fbeta_score on the test data:\n","     pred_neg  pred_pos\n","neg     15686       571\n","pos      1691       581\n","*********************************************************************************\n","*********************************************************************************\n","__________________________________ 200 ________________________________________\n","Fitting 10 folds for each of 1 candidates, totalling 10 fits\n"],"name":"stdout"},{"output_type":"stream","text":["[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n","[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:   55.1s\n","[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:   55.3s\n","[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:  1.8min\n","[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:  1.8min\n","[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:  2.6min\n","[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:  2.6min\n","[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:  3.5min\n","[Parallel(n_jobs=-1)]: Done   8 out of  10 | elapsed:  3.5min remaining:   52.5s\n","[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  4.4min remaining:    0.0s\n","[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  4.4min finished\n"],"name":"stderr"},{"output_type":"stream","text":["Best params for fbeta_score\n","{'class_weight': {1: 0.122, 0: 0.877}, 'learning_rate': 0.9, 'n_estimators': 500}\n","\n","Confusion matrix of Random Forest optimized for fbeta_score on the test data:\n","     pred_neg  pred_pos\n","neg     15686       571\n","pos      1691       581\n","*********************************************************************************\n","*********************************************************************************\n","__________________________________ 1000 ________________________________________\n","Fitting 10 folds for each of 1 candidates, totalling 10 fits\n"],"name":"stdout"},{"output_type":"stream","text":["[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n","[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:   51.7s\n","[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:   51.8s\n","[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:  1.7min\n","[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:  1.7min\n","[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:  2.6min\n","[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:  2.6min\n","[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:  3.4min\n","[Parallel(n_jobs=-1)]: Done   8 out of  10 | elapsed:  3.4min remaining:   51.5s\n","[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  4.3min remaining:    0.0s\n","[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  4.3min finished\n"],"name":"stderr"},{"output_type":"stream","text":["Best params for fbeta_score\n","{'class_weight': {1: 0.122, 0: 0.877}, 'learning_rate': 0.9, 'n_estimators': 500}\n","\n","Confusion matrix of Random Forest optimized for fbeta_score on the test data:\n","     pred_neg  pred_pos\n","neg     15686       571\n","pos      1691       581\n","*********************************************************************************\n","*********************************************************************************\n","__________________________________ 500 ________________________________________\n","Fitting 10 folds for each of 1 candidates, totalling 10 fits\n"],"name":"stdout"},{"output_type":"stream","text":["[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n","[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:   51.3s\n","[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:   51.4s\n","[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:  1.7min\n","[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:  1.7min\n","[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:  2.6min\n","[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:  2.6min\n","[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:  3.5min\n","[Parallel(n_jobs=-1)]: Done   8 out of  10 | elapsed:  3.5min remaining:   52.4s\n","[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  4.3min remaining:    0.0s\n","[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  4.3min finished\n"],"name":"stderr"},{"output_type":"stream","text":["Best params for fbeta_score\n","{'class_weight': {1: 0.122, 0: 0.877}, 'learning_rate': 0.9, 'n_estimators': 500}\n","\n","Confusion matrix of Random Forest optimized for fbeta_score on the test data:\n","     pred_neg  pred_pos\n","neg     15686       571\n","pos      1691       581\n","*********************************************************************************\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"XNTy31JXIsWv","executionInfo":{"status":"ok","timestamp":1605785343775,"user_tz":-330,"elapsed":1181,"user":{"displayName":"Mansi Gupta","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg-eSqQt6zvClgSY0_QT_GIvO_LGNz3Cau1wv1S7w=s64","userId":"09741357658171476289"}}},"source":["scorers = {\n","     'precision_score': make_scorer(precision_score),\n","     'recall_score': make_scorer(recall_score),\n","     'accuracy_score': make_scorer(accuracy_score),\n","     'fbeta_score': make_scorer(fbeta_score, beta=200),\n","     'roc_auc_score':make_scorer(roc_auc_score)\n","}\n","from sklearn.model_selection import RandomizedSearchCV\n","from sklearn.tree import DecisionTreeClassifier\n","from sklearn.ensemble import RandomForestClassifier\n","\n"],"execution_count":32,"outputs":[]},{"cell_type":"code","metadata":{"id":"8e_zLiZ7SJec","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1605694532969,"user_tz":-330,"elapsed":556557,"user":{"displayName":"Mansi Gupta","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg-eSqQt6zvClgSY0_QT_GIvO_LGNz3Cau1wv1S7w=s64","userId":"09741357658171476289"}},"outputId":"a30eeca4-18e9-4ba5-c6aa-b2fe306217c5"},"source":["clf = RandomForestClassifier()\n","param_grid = {\n","    \"class_weight\":[{1:0.122,0:0.877}],\n","    \"n_estimators\":[500],\n","    #'min_samples_split': [3, 5, 10], \n","    #'max_depth': [3, 5, 15, 25],\n","    #'max_features': [3, 5, 10, 20]\n","}\n","grid_search_clf = grid_search_wrapper(refit_score='precision_score')\n"],"execution_count":20,"outputs":[{"output_type":"stream","text":["Fitting 10 folds for each of 1 candidates, totalling 10 fits\n"],"name":"stdout"},{"output_type":"stream","text":["[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n","[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:  1.5min\n","[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:  1.5min\n","[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:  3.2min\n","[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:  3.2min\n","[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:  4.7min\n","[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:  4.8min\n","[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:  6.3min\n","[Parallel(n_jobs=-1)]: Done   8 out of  10 | elapsed:  6.3min remaining:  1.6min\n","[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  8.0min remaining:    0.0s\n","[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  8.0min finished\n"],"name":"stderr"},{"output_type":"stream","text":["Best params for precision_score\n","{'class_weight': {1: 0.122, 0: 0.877}, 'n_estimators': 500}\n","\n","Confusion matrix of Random Forest optimized for precision_score on the test data:\n","     pred_neg  pred_pos\n","neg     16133       124\n","pos      1817       455\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"XpOUHlstjwLk","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1605212212690,"user_tz":-330,"elapsed":534979,"user":{"displayName":"Mansi Gupta","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg-eSqQt6zvClgSY0_QT_GIvO_LGNz3Cau1wv1S7w=s64","userId":"09741357658171476289"}},"outputId":"791d7d1b-4e84-44f6-e21b-6cb61812570e"},"source":["clf = RandomForestClassifier()\n","param_grid = {\n","    \"class_weight\":[{1:0.122,0:0.877}],\n","    \"n_estimators\":[500],\n","    #'min_samples_split': [3, 5, 10], \n","    #'max_depth': [3, 5, 15, 25],\n","    #'max_features': [3, 5, 10, 20]\n","}\n","grid_search_clf = grid_search_wrapper(refit_score='recall_score')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting 10 folds for each of 1 candidates, totalling 10 fits\n"],"name":"stdout"},{"output_type":"stream","text":["[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n","[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:  1.5min\n","[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:  1.5min\n","[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:  3.1min\n","[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:  3.1min\n","[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:  4.6min\n","[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:  4.6min\n","[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:  6.2min\n","[Parallel(n_jobs=-1)]: Done   8 out of  10 | elapsed:  6.2min remaining:  1.5min\n","[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  7.8min remaining:    0.0s\n","[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  7.8min finished\n"],"name":"stderr"},{"output_type":"stream","text":["Best params for recall_score\n","{'class_weight': {1: 0.122, 0: 0.877}, 'n_estimators': 500}\n","\n","Confusion matrix of Random Forest optimized for recall_score on the test data:\n","     pred_neg  pred_pos\n","neg     16140       117\n","pos      1827       445\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"w5O3F2R40i5F","executionInfo":{"status":"ok","timestamp":1605696690879,"user_tz":-330,"elapsed":1137778,"user":{"displayName":"Mansi Gupta","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg-eSqQt6zvClgSY0_QT_GIvO_LGNz3Cau1wv1S7w=s64","userId":"09741357658171476289"}},"outputId":"7cfb9ec9-29e3-47cf-d098-82cce68ae4eb"},"source":["clf = RandomForestClassifier()\n","param_grid = {\n","    \"class_weight\":[{1:0.122,0:0.877}],\n","    \"n_estimators\":[500],\n","    #'min_samples_split': [3, 5, 10], \n","    'max_depth': [3, 5, 15, 25],\n","    #'max_features': [3, 5, 10, 20]\n","}\n","grid_search_clf = grid_search_wrapper(refit_score='fbeta_score')"],"execution_count":30,"outputs":[{"output_type":"stream","text":["Fitting 10 folds for each of 4 candidates, totalling 40 fits\n"],"name":"stdout"},{"output_type":"stream","text":["[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n","[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:   22.6s\n","[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:   22.6s\n","[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:   44.3s\n","[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:   44.4s\n","[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:  1.1min\n","[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:  1.1min\n","[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:  1.5min\n","[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:  1.5min\n","[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:  1.8min\n","[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:  1.8min\n","[Parallel(n_jobs=-1)]: Done  11 tasks      | elapsed:  2.3min\n","[Parallel(n_jobs=-1)]: Done  12 tasks      | elapsed:  2.4min\n","[Parallel(n_jobs=-1)]: Done  13 tasks      | elapsed:  2.8min\n","[Parallel(n_jobs=-1)]: Done  14 tasks      | elapsed:  2.9min\n","[Parallel(n_jobs=-1)]: Done  15 tasks      | elapsed:  3.4min\n","[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:  3.4min\n","[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:  3.9min\n","[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:  3.9min\n","[Parallel(n_jobs=-1)]: Done  19 tasks      | elapsed:  4.4min\n","[Parallel(n_jobs=-1)]: Done  20 tasks      | elapsed:  4.4min\n","[Parallel(n_jobs=-1)]: Done  21 tasks      | elapsed:  5.5min\n","[Parallel(n_jobs=-1)]: Done  22 tasks      | elapsed:  5.5min\n","[Parallel(n_jobs=-1)]: Done  23 tasks      | elapsed:  6.7min\n","[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:  6.7min\n","[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:  7.8min\n","[Parallel(n_jobs=-1)]: Done  26 tasks      | elapsed:  7.9min\n","[Parallel(n_jobs=-1)]: Done  27 tasks      | elapsed:  9.0min\n","[Parallel(n_jobs=-1)]: Done  28 tasks      | elapsed:  9.1min\n","[Parallel(n_jobs=-1)]: Done  29 tasks      | elapsed: 10.1min\n","[Parallel(n_jobs=-1)]: Done  30 tasks      | elapsed: 10.2min\n","[Parallel(n_jobs=-1)]: Done  31 tasks      | elapsed: 11.6min\n","[Parallel(n_jobs=-1)]: Done  32 tasks      | elapsed: 11.7min\n","[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed: 13.2min\n","[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed: 13.3min\n","[Parallel(n_jobs=-1)]: Done  35 tasks      | elapsed: 14.7min\n","[Parallel(n_jobs=-1)]: Done  36 tasks      | elapsed: 14.8min\n","[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed: 16.2min\n","[Parallel(n_jobs=-1)]: Done  40 out of  40 | elapsed: 17.8min finished\n"],"name":"stderr"},{"output_type":"stream","text":["Best params for fbeta_score\n","{'class_weight': {1: 0.122, 0: 0.877}, 'max_depth': 25, 'n_estimators': 500}\n","\n","Confusion matrix of Random Forest optimized for fbeta_score on the test data:\n","     pred_neg  pred_pos\n","neg     16183        74\n","pos      1887       385\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"jyggNhpqMfbX","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1605785362102,"user_tz":-330,"elapsed":3586,"user":{"displayName":"Mansi Gupta","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg-eSqQt6zvClgSY0_QT_GIvO_LGNz3Cau1wv1S7w=s64","userId":"09741357658171476289"}},"outputId":"df349079-258b-478e-f491-0f694b6da7c7"},"source":["!pip install rfpimp\n","from rfpimp import *\n"],"execution_count":33,"outputs":[{"output_type":"stream","text":["Requirement already satisfied: rfpimp in /usr/local/lib/python3.6/dist-packages (1.3.5)\n","Requirement already satisfied: scikit-learn in /usr/local/lib/python3.6/dist-packages (from rfpimp) (0.22.2.post1)\n","Requirement already satisfied: stratx>=0.2 in /usr/local/lib/python3.6/dist-packages (from rfpimp) (0.5)\n","Requirement already satisfied: pandas in /usr/local/lib/python3.6/dist-packages (from rfpimp) (1.1.4)\n","Requirement already satisfied: matplotlib in /usr/local/lib/python3.6/dist-packages (from rfpimp) (3.2.2)\n","Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from rfpimp) (1.18.5)\n","Requirement already satisfied: scipy>=0.17.0 in /usr/local/lib/python3.6/dist-packages (from scikit-learn->rfpimp) (1.4.1)\n","Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.6/dist-packages (from scikit-learn->rfpimp) (0.17.0)\n","Requirement already satisfied: colour in /usr/local/lib/python3.6/dist-packages (from stratx>=0.2->rfpimp) (0.1.5)\n","Requirement already satisfied: numba in /usr/local/lib/python3.6/dist-packages (from stratx>=0.2->rfpimp) (0.48.0)\n","Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.6/dist-packages (from pandas->rfpimp) (2.8.1)\n","Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.6/dist-packages (from pandas->rfpimp) (2018.9)\n","Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.6/dist-packages (from matplotlib->rfpimp) (0.10.0)\n","Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib->rfpimp) (2.4.7)\n","Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib->rfpimp) (1.3.1)\n","Requirement already satisfied: llvmlite<0.32.0,>=0.31.0dev0 in /usr/local/lib/python3.6/dist-packages (from numba->stratx>=0.2->rfpimp) (0.31.0)\n","Requirement already satisfied: setuptools in /usr/local/lib/python3.6/dist-packages (from numba->stratx>=0.2->rfpimp) (50.3.2)\n","Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.6/dist-packages (from python-dateutil>=2.7.3->pandas->rfpimp) (1.15.0)\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"kdauR-ReqaQm","executionInfo":{"status":"ok","timestamp":1605785529984,"user_tz":-330,"elapsed":855,"user":{"displayName":"Mansi Gupta","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg-eSqQt6zvClgSY0_QT_GIvO_LGNz3Cau1wv1S7w=s64","userId":"09741357658171476289"}}},"source":["\n","# adding a column in X with random values to help find out correct feature importance and features to be removed.\n","xtrain['random']=np.random.choice(range(xtrain.shape[0]), xtrain.shape[0])\n","xtest['random']=np.random.choice(range(xtrain.shape[0]), xtest.shape[0])"],"execution_count":35,"outputs":[]},{"cell_type":"code","metadata":{"id":"yip1MQbiRPTb","executionInfo":{"status":"ok","timestamp":1605785534332,"user_tz":-330,"elapsed":873,"user":{"displayName":"Mansi Gupta","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg-eSqQt6zvClgSY0_QT_GIvO_LGNz3Cau1wv1S7w=s64","userId":"09741357658171476289"}}},"source":["from xgboost import XGBClassifier\n","clf = XGBClassifier()\n","rf=XGBClassifier(base_score=0.5, booster='gbtree',\n","              class_weight={0: 0.877, 1: 0.122}, colsample_bylevel=1,\n","              colsample_bynode=1, colsample_bytree=1, gamma=0,\n","              learning_rate=0.9, max_delta_step=0, max_depth=3,\n","              min_child_weight=1, missing=None, n_estimators=500, n_jobs=1,\n","              nthread=None, objective='binary:logistic', random_state=0,\n","              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,\n","              silent=None, subsample=1, verbosity=1)"],"execution_count":36,"outputs":[]},{"cell_type":"code","metadata":{"id":"hyylyR-Euo42","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1605785538903,"user_tz":-330,"elapsed":848,"user":{"displayName":"Mansi Gupta","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg-eSqQt6zvClgSY0_QT_GIvO_LGNz3Cau1wv1S7w=s64","userId":"09741357658171476289"}},"outputId":"6b5aaa38-f578-422a-f50c-ca212b647b8f"},"source":["xtest.shape"],"execution_count":37,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(18529, 60)"]},"metadata":{"tags":[]},"execution_count":37}]},{"cell_type":"code","metadata":{"id":"67OOYvjFmjEU","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1605785577732,"user_tz":-330,"elapsed":38402,"user":{"displayName":"Mansi Gupta","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg-eSqQt6zvClgSY0_QT_GIvO_LGNz3Cau1wv1S7w=s64","userId":"09741357658171476289"}},"outputId":"8a05397b-16ac-4803-e45c-1f4bab7dceb7"},"source":["rf.fit(xtrain, ytrain)"],"execution_count":38,"outputs":[{"output_type":"execute_result","data":{"text/plain":["XGBClassifier(base_score=0.5, booster='gbtree',\n","              class_weight={0: 0.877, 1: 0.122}, colsample_bylevel=1,\n","              colsample_bynode=1, colsample_bytree=1, gamma=0,\n","              learning_rate=0.9, max_delta_step=0, max_depth=3,\n","              min_child_weight=1, missing=None, n_estimators=500, n_jobs=1,\n","              nthread=None, objective='binary:logistic', random_state=0,\n","              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,\n","              silent=None, subsample=1, verbosity=1)"]},"metadata":{"tags":[]},"execution_count":38}]},{"cell_type":"code","metadata":{"id":"ZSMLkvT7zRfd","executionInfo":{"status":"ok","timestamp":1605785625185,"user_tz":-330,"elapsed":85459,"user":{"displayName":"Mansi Gupta","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg-eSqQt6zvClgSY0_QT_GIvO_LGNz3Cau1wv1S7w=s64","userId":"09741357658171476289"}}},"source":["list(zip(df.columns,rf.feature_importances_))\n","imp = importances(rf, xtrain, ytrain, n_samples=-1) # this function comes from package rfpimp\n","viz = plot_importances(imp)\n","viz.view()"],"execution_count":39,"outputs":[]},{"cell_type":"code","metadata":{"id":"KMufoOmatljg","colab":{"base_uri":"https://localhost:8080/","height":1000},"executionInfo":{"status":"ok","timestamp":1605785625189,"user_tz":-330,"elapsed":80949,"user":{"displayName":"Mansi Gupta","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg-eSqQt6zvClgSY0_QT_GIvO_LGNz3Cau1wv1S7w=s64","userId":"09741357658171476289"}},"outputId":"08601bc5-14cc-4137-b103-96184e44346c"},"source":["imp"],"execution_count":40,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>Importance</th>\n","    </tr>\n","    <tr>\n","      <th>Feature</th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>PriceIndex7</th>\n","      <td>0.133298</td>\n","    </tr>\n","    <tr>\n","      <th>PriceIndex6</th>\n","      <td>0.089860</td>\n","    </tr>\n","    <tr>\n","      <th>AreaIncomeType_B</th>\n","      <td>0.087801</td>\n","    </tr>\n","    <tr>\n","      <th>AreaIncomeType_A</th>\n","      <td>0.081140</td>\n","    </tr>\n","    <tr>\n","      <th>PriceIndex1</th>\n","      <td>0.059837</td>\n","    </tr>\n","    <tr>\n","      <th>PriceIndex8</th>\n","      <td>0.050747</td>\n","    </tr>\n","    <tr>\n","      <th>PriceIndex5</th>\n","      <td>0.048804</td>\n","    </tr>\n","    <tr>\n","      <th>PriceIndex3</th>\n","      <td>0.042374</td>\n","    </tr>\n","    <tr>\n","      <th>PriceIndex4</th>\n","      <td>0.040177</td>\n","    </tr>\n","    <tr>\n","      <th>PriceIndex2</th>\n","      <td>0.035435</td>\n","    </tr>\n","    <tr>\n","      <th>PriceIndex9</th>\n","      <td>0.029606</td>\n","    </tr>\n","    <tr>\n","      <th>Zip</th>\n","      <td>0.022968</td>\n","    </tr>\n","    <tr>\n","      <th>NormalisedPopulation</th>\n","      <td>0.021025</td>\n","    </tr>\n","    <tr>\n","      <th>random</th>\n","      <td>0.018365</td>\n","    </tr>\n","    <tr>\n","      <th>InsurancePremiumIndex</th>\n","      <td>0.015243</td>\n","    </tr>\n","    <tr>\n","      <th>PropertyAge</th>\n","      <td>0.010940</td>\n","    </tr>\n","    <tr>\n","      <th>State_CA</th>\n","      <td>0.010617</td>\n","    </tr>\n","    <tr>\n","      <th>Channel_Direct</th>\n","      <td>0.006407</td>\n","    </tr>\n","    <tr>\n","      <th>BuildYear</th>\n","      <td>0.004163</td>\n","    </tr>\n","    <tr>\n","      <th>State_TX</th>\n","      <td>0.001804</td>\n","    </tr>\n","    <tr>\n","      <th>Channel_Agent</th>\n","      <td>0.001596</td>\n","    </tr>\n","    <tr>\n","      <th>PlotType_32T2</th>\n","      <td>0.001411</td>\n","    </tr>\n","    <tr>\n","      <th>Agency_CAT3</th>\n","      <td>0.001342</td>\n","    </tr>\n","    <tr>\n","      <th>State_AZ</th>\n","      <td>0.001295</td>\n","    </tr>\n","    <tr>\n","      <th>ExpeditedListing</th>\n","      <td>0.001203</td>\n","    </tr>\n","    <tr>\n","      <th>Agency_CAT2</th>\n","      <td>0.001156</td>\n","    </tr>\n","    <tr>\n","      <th>Agency_CAT1</th>\n","      <td>0.001018</td>\n","    </tr>\n","    <tr>\n","      <th>Channel_Other</th>\n","      <td>0.000948</td>\n","    </tr>\n","    <tr>\n","      <th>Facade_SILVER</th>\n","      <td>0.000902</td>\n","    </tr>\n","    <tr>\n","      <th>PlotType_WLQ6</th>\n","      <td>0.000879</td>\n","    </tr>\n","    <tr>\n","      <th>Architecture_YIK5</th>\n","      <td>0.000833</td>\n","    </tr>\n","    <tr>\n","      <th>Material_C8A4</th>\n","      <td>0.000786</td>\n","    </tr>\n","    <tr>\n","      <th>Facade_GREY</th>\n","      <td>0.000786</td>\n","    </tr>\n","    <tr>\n","      <th>Region_A</th>\n","      <td>0.000763</td>\n","    </tr>\n","    <tr>\n","      <th>Architecture_AVYP</th>\n","      <td>0.000717</td>\n","    </tr>\n","    <tr>\n","      <th>Architecture_TP7R</th>\n","      <td>0.000694</td>\n","    </tr>\n","    <tr>\n","      <th>Material_PRN0</th>\n","      <td>0.000694</td>\n","    </tr>\n","    <tr>\n","      <th>Material_NYFD</th>\n","      <td>0.000648</td>\n","    </tr>\n","    <tr>\n","      <th>Material_ZCI3</th>\n","      <td>0.000625</td>\n","    </tr>\n","    <tr>\n","      <th>PlotType_WHBI</th>\n","      <td>0.000601</td>\n","    </tr>\n","    <tr>\n","      <th>Facade_BLUE</th>\n","      <td>0.000555</td>\n","    </tr>\n","    <tr>\n","      <th>Facade_WHITE</th>\n","      <td>0.000532</td>\n","    </tr>\n","    <tr>\n","      <th>SubModel_FIVQ</th>\n","      <td>0.000486</td>\n","    </tr>\n","    <tr>\n","      <th>PlotType_W62B</th>\n","      <td>0.000463</td>\n","    </tr>\n","    <tr>\n","      <th>Facade_GOLD</th>\n","      <td>0.000439</td>\n","    </tr>\n","    <tr>\n","      <th>Facade_RED</th>\n","      <td>0.000416</td>\n","    </tr>\n","    <tr>\n","      <th>State_NC</th>\n","      <td>0.000393</td>\n","    </tr>\n","    <tr>\n","      <th>RegionType_B</th>\n","      <td>0.000393</td>\n","    </tr>\n","    <tr>\n","      <th>Architecture_I3Z9</th>\n","      <td>0.000347</td>\n","    </tr>\n","    <tr>\n","      <th>State_SC</th>\n","      <td>0.000278</td>\n","    </tr>\n","    <tr>\n","      <th>RegionType_C</th>\n","      <td>0.000231</td>\n","    </tr>\n","    <tr>\n","      <th>PlotType_DIJX</th>\n","      <td>0.000231</td>\n","    </tr>\n","    <tr>\n","      <th>State_CO</th>\n","      <td>0.000162</td>\n","    </tr>\n","    <tr>\n","      <th>State_FL</th>\n","      <td>0.000162</td>\n","    </tr>\n","    <tr>\n","      <th>Architecture_1M5X</th>\n","      <td>0.000116</td>\n","    </tr>\n","    <tr>\n","      <th>Agency_OTHER</th>\n","      <td>0.000093</td>\n","    </tr>\n","    <tr>\n","      <th>SubModel_UA42</th>\n","      <td>0.000023</td>\n","    </tr>\n","    <tr>\n","      <th>RegionType_A</th>\n","      <td>0.000000</td>\n","    </tr>\n","    <tr>\n","      <th>Facade_BLACK</th>\n","      <td>-0.000069</td>\n","    </tr>\n","    <tr>\n","      <th>SubModel_KT8F</th>\n","      <td>-0.000370</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["                       Importance\n","Feature                          \n","PriceIndex7              0.133298\n","PriceIndex6              0.089860\n","AreaIncomeType_B         0.087801\n","AreaIncomeType_A         0.081140\n","PriceIndex1              0.059837\n","PriceIndex8              0.050747\n","PriceIndex5              0.048804\n","PriceIndex3              0.042374\n","PriceIndex4              0.040177\n","PriceIndex2              0.035435\n","PriceIndex9              0.029606\n","Zip                      0.022968\n","NormalisedPopulation     0.021025\n","random                   0.018365\n","InsurancePremiumIndex    0.015243\n","PropertyAge              0.010940\n","State_CA                 0.010617\n","Channel_Direct           0.006407\n","BuildYear                0.004163\n","State_TX                 0.001804\n","Channel_Agent            0.001596\n","PlotType_32T2            0.001411\n","Agency_CAT3              0.001342\n","State_AZ                 0.001295\n","ExpeditedListing         0.001203\n","Agency_CAT2              0.001156\n","Agency_CAT1              0.001018\n","Channel_Other            0.000948\n","Facade_SILVER            0.000902\n","PlotType_WLQ6            0.000879\n","Architecture_YIK5        0.000833\n","Material_C8A4            0.000786\n","Facade_GREY              0.000786\n","Region_A                 0.000763\n","Architecture_AVYP        0.000717\n","Architecture_TP7R        0.000694\n","Material_PRN0            0.000694\n","Material_NYFD            0.000648\n","Material_ZCI3            0.000625\n","PlotType_WHBI            0.000601\n","Facade_BLUE              0.000555\n","Facade_WHITE             0.000532\n","SubModel_FIVQ            0.000486\n","PlotType_W62B            0.000463\n","Facade_GOLD              0.000439\n","Facade_RED               0.000416\n","State_NC                 0.000393\n","RegionType_B             0.000393\n","Architecture_I3Z9        0.000347\n","State_SC                 0.000278\n","RegionType_C             0.000231\n","PlotType_DIJX            0.000231\n","State_CO                 0.000162\n","State_FL                 0.000162\n","Architecture_1M5X        0.000116\n","Agency_OTHER             0.000093\n","SubModel_UA42            0.000023\n","RegionType_A             0.000000\n","Facade_BLACK            -0.000069\n","SubModel_KT8F           -0.000370"]},"metadata":{"tags":[]},"execution_count":40}]},{"cell_type":"code","metadata":{"id":"48j5EuDTuHwJ","colab":{"base_uri":"https://localhost:8080/","height":443},"executionInfo":{"status":"ok","timestamp":1605785625193,"user_tz":-330,"elapsed":68399,"user":{"displayName":"Mansi Gupta","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg-eSqQt6zvClgSY0_QT_GIvO_LGNz3Cau1wv1S7w=s64","userId":"09741357658171476289"}},"outputId":"f84bb7d8-d5ba-47db-e003-a9d1eb234a69"},"source":["indx= imp[imp.values > 0.018943].index.values\n","xtrain[indx]"],"execution_count":41,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>PriceIndex7</th>\n","      <th>PriceIndex6</th>\n","      <th>AreaIncomeType_B</th>\n","      <th>AreaIncomeType_A</th>\n","      <th>PriceIndex1</th>\n","      <th>PriceIndex8</th>\n","      <th>PriceIndex5</th>\n","      <th>PriceIndex3</th>\n","      <th>PriceIndex4</th>\n","      <th>PriceIndex2</th>\n","      <th>PriceIndex9</th>\n","      <th>Zip</th>\n","      <th>NormalisedPopulation</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>61239</th>\n","      <td>4434.0</td>\n","      <td>4910.0</td>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>3643.0</td>\n","      <td>5803.0</td>\n","      <td>3643.0</td>\n","      <td>4434.0</td>\n","      <td>5803.0</td>\n","      <td>4910.0</td>\n","      <td>4175</td>\n","      <td>87105</td>\n","      <td>84876</td>\n","    </tr>\n","    <tr>\n","      <th>22896</th>\n","      <td>4038.0</td>\n","      <td>4105.0</td>\n","      <td>0</td>\n","      <td>1</td>\n","      <td>3276.0</td>\n","      <td>4933.0</td>\n","      <td>3276.0</td>\n","      <td>4038.0</td>\n","      <td>4933.0</td>\n","      <td>4105.0</td>\n","      <td>5180</td>\n","      <td>77041</td>\n","      <td>58119</td>\n","    </tr>\n","    <tr>\n","      <th>21770</th>\n","      <td>5964.0</td>\n","      <td>3984.0</td>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>2200.0</td>\n","      <td>7078.0</td>\n","      <td>3197.0</td>\n","      <td>4798.0</td>\n","      <td>6334.0</td>\n","      <td>3313.0</td>\n","      <td>4450</td>\n","      <td>29532</td>\n","      <td>108042</td>\n","    </tr>\n","    <tr>\n","      <th>2859</th>\n","      <td>9753.0</td>\n","      <td>9325.0</td>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>8619.0</td>\n","      <td>10571.0</td>\n","      <td>8568.0</td>\n","      <td>9809.0</td>\n","      <td>10623.0</td>\n","      <td>9373.0</td>\n","      <td>8300</td>\n","      <td>74135</td>\n","      <td>45610</td>\n","    </tr>\n","    <tr>\n","      <th>22635</th>\n","      <td>3633.0</td>\n","      <td>3778.0</td>\n","      <td>0</td>\n","      <td>1</td>\n","      <td>3514.0</td>\n","      <td>4580.0</td>\n","      <td>2901.0</td>\n","      <td>4295.0</td>\n","      <td>5527.0</td>\n","      <td>4655.0</td>\n","      <td>5155</td>\n","      <td>80229</td>\n","      <td>81733</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>30794</th>\n","      <td>12546.0</td>\n","      <td>9471.0</td>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>8586.0</td>\n","      <td>13669.0</td>\n","      <td>8411.0</td>\n","      <td>11953.0</td>\n","      <td>13553.0</td>\n","      <td>9735.0</td>\n","      <td>8435</td>\n","      <td>73108</td>\n","      <td>46078</td>\n","    </tr>\n","    <tr>\n","      <th>52165</th>\n","      <td>4615.0</td>\n","      <td>4944.0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>3810.0</td>\n","      <td>5840.0</td>\n","      <td>3810.0</td>\n","      <td>4615.0</td>\n","      <td>5840.0</td>\n","      <td>4944.0</td>\n","      <td>4415</td>\n","      <td>28273</td>\n","      <td>70504</td>\n","    </tr>\n","    <tr>\n","      <th>49788</th>\n","      <td>8597.0</td>\n","      <td>7292.0</td>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>3652.0</td>\n","      <td>10372.0</td>\n","      <td>5525.0</td>\n","      <td>7504.0</td>\n","      <td>8356.0</td>\n","      <td>4857.0</td>\n","      <td>4100</td>\n","      <td>80022</td>\n","      <td>53109</td>\n","    </tr>\n","    <tr>\n","      <th>11522</th>\n","      <td>8879.0</td>\n","      <td>7922.0</td>\n","      <td>0</td>\n","      <td>1</td>\n","      <td>6743.0</td>\n","      <td>9606.0</td>\n","      <td>6167.0</td>\n","      <td>9401.0</td>\n","      <td>10349.0</td>\n","      <td>7696.0</td>\n","      <td>7730</td>\n","      <td>80022</td>\n","      <td>79501</td>\n","    </tr>\n","    <tr>\n","      <th>55964</th>\n","      <td>10103.0</td>\n","      <td>7670.0</td>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>7308.0</td>\n","      <td>12064.0</td>\n","      <td>6411.0</td>\n","      <td>8393.0</td>\n","      <td>10029.0</td>\n","      <td>8823.0</td>\n","      <td>7195</td>\n","      <td>20166</td>\n","      <td>74014</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>43234 rows × 13 columns</p>\n","</div>"],"text/plain":["       PriceIndex7  PriceIndex6  ...    Zip  NormalisedPopulation\n","61239       4434.0       4910.0  ...  87105                 84876\n","22896       4038.0       4105.0  ...  77041                 58119\n","21770       5964.0       3984.0  ...  29532                108042\n","2859        9753.0       9325.0  ...  74135                 45610\n","22635       3633.0       3778.0  ...  80229                 81733\n","...            ...          ...  ...    ...                   ...\n","30794      12546.0       9471.0  ...  73108                 46078\n","52165       4615.0       4944.0  ...  28273                 70504\n","49788       8597.0       7292.0  ...  80022                 53109\n","11522       8879.0       7922.0  ...  80022                 79501\n","55964      10103.0       7670.0  ...  20166                 74014\n","\n","[43234 rows x 13 columns]"]},"metadata":{"tags":[]},"execution_count":41}]},{"cell_type":"code","metadata":{"id":"2KY209RMo9au","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1605127919857,"user_tz":-330,"elapsed":1334568,"user":{"displayName":"Mansi Gupta","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg-eSqQt6zvClgSY0_QT_GIvO_LGNz3Cau1wv1S7w=s64","userId":"09741357658171476289"}},"outputId":"9a4525a3-f942-4e30-838c-f5097476bac3"},"source":["from xgboost import XGBClassifier\n","clf = XGBClassifier()\n","\n","param_grid = {\n","        'learning_rate': [0.01, 0.05,0.1,0.25, 0.5, 0.7],\n","        'class_weight': [{1: 0.122, 0: 0.877}], 'n_estimators': [500]\n","        #'max_depth': [3, 5, 7, 10],\n","        #'min_child_weight': [1, 3, 5],\n","        #'subsample': [0.5, 0.7],\n","        #'colsample_bytree': [0.5, 0.7]\n","}\n","\n","grid_search_clf = grid_search_wrapper(refit_score='recall_score')\n","\n","gsf= grid_search_clf.best_estimator_\n","gsf.fit(xtrain[indx],ytrain)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting 10 folds for each of 6 candidates, totalling 60 fits\n"],"name":"stdout"},{"output_type":"stream","text":["[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n","[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:   45.3s\n","[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:   45.4s\n","[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:  1.5min\n","[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:  1.5min\n","[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:  2.2min\n","[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:  2.2min\n","[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:  2.9min\n","[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:  2.9min\n","[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:  3.7min\n","[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:  3.7min\n","[Parallel(n_jobs=-1)]: Done  11 tasks      | elapsed:  4.4min\n","[Parallel(n_jobs=-1)]: Done  12 tasks      | elapsed:  4.4min\n","[Parallel(n_jobs=-1)]: Done  13 tasks      | elapsed:  5.1min\n","[Parallel(n_jobs=-1)]: Done  14 tasks      | elapsed:  5.1min\n","[Parallel(n_jobs=-1)]: Done  15 tasks      | elapsed:  5.8min\n","[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:  5.8min\n","[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:  6.5min\n","[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:  6.5min\n","[Parallel(n_jobs=-1)]: Done  19 tasks      | elapsed:  7.3min\n","[Parallel(n_jobs=-1)]: Done  20 tasks      | elapsed:  7.3min\n","[Parallel(n_jobs=-1)]: Done  21 tasks      | elapsed:  8.0min\n","[Parallel(n_jobs=-1)]: Done  22 tasks      | elapsed:  8.0min\n","[Parallel(n_jobs=-1)]: Done  23 tasks      | elapsed:  8.7min\n","[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:  8.7min\n","[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:  9.4min\n","[Parallel(n_jobs=-1)]: Done  26 tasks      | elapsed:  9.4min\n","[Parallel(n_jobs=-1)]: Done  27 tasks      | elapsed: 10.1min\n","[Parallel(n_jobs=-1)]: Done  28 tasks      | elapsed: 10.1min\n","[Parallel(n_jobs=-1)]: Done  29 tasks      | elapsed: 10.8min\n","[Parallel(n_jobs=-1)]: Done  30 tasks      | elapsed: 10.9min\n","[Parallel(n_jobs=-1)]: Done  31 tasks      | elapsed: 11.6min\n","[Parallel(n_jobs=-1)]: Done  32 tasks      | elapsed: 11.6min\n","[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed: 12.3min\n","[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed: 12.3min\n","[Parallel(n_jobs=-1)]: Done  35 tasks      | elapsed: 13.0min\n","[Parallel(n_jobs=-1)]: Done  36 tasks      | elapsed: 13.0min\n","[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed: 13.7min\n","[Parallel(n_jobs=-1)]: Done  38 tasks      | elapsed: 13.7min\n","[Parallel(n_jobs=-1)]: Done  39 tasks      | elapsed: 14.4min\n","[Parallel(n_jobs=-1)]: Done  40 tasks      | elapsed: 14.4min\n","[Parallel(n_jobs=-1)]: Done  41 tasks      | elapsed: 15.1min\n","[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed: 15.1min\n","[Parallel(n_jobs=-1)]: Done  43 tasks      | elapsed: 15.9min\n","[Parallel(n_jobs=-1)]: Done  44 tasks      | elapsed: 15.9min\n","[Parallel(n_jobs=-1)]: Done  45 tasks      | elapsed: 16.6min\n","[Parallel(n_jobs=-1)]: Done  46 tasks      | elapsed: 16.6min\n","[Parallel(n_jobs=-1)]: Done  47 tasks      | elapsed: 17.3min\n","[Parallel(n_jobs=-1)]: Done  48 tasks      | elapsed: 17.3min\n","[Parallel(n_jobs=-1)]: Done  49 tasks      | elapsed: 18.0min\n","[Parallel(n_jobs=-1)]: Done  50 tasks      | elapsed: 18.0min\n","[Parallel(n_jobs=-1)]: Done  51 tasks      | elapsed: 18.7min\n","[Parallel(n_jobs=-1)]: Done  52 tasks      | elapsed: 18.7min\n","[Parallel(n_jobs=-1)]: Done  53 tasks      | elapsed: 19.4min\n","[Parallel(n_jobs=-1)]: Done  54 tasks      | elapsed: 19.4min\n","[Parallel(n_jobs=-1)]: Done  55 tasks      | elapsed: 20.1min\n","[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed: 20.1min\n","[Parallel(n_jobs=-1)]: Done  57 tasks      | elapsed: 20.9min\n","[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed: 21.6min remaining:    0.0s\n","[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed: 21.6min finished\n"],"name":"stderr"},{"output_type":"stream","text":["Best params for recall_score\n","{'class_weight': {1: 0.122, 0: 0.877}, 'learning_rate': 0.7, 'n_estimators': 500}\n","\n","Confusion matrix of Random Forest optimized for recall_score on the test data:\n","     pred_neg  pred_pos\n","neg     15808       449\n","pos      1705       567\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/plain":["XGBClassifier(base_score=0.5, booster='gbtree',\n","              class_weight={0: 0.877, 1: 0.122}, colsample_bylevel=1,\n","              colsample_bynode=1, colsample_bytree=1, gamma=0,\n","              learning_rate=0.7, max_delta_step=0, max_depth=3,\n","              min_child_weight=1, missing=None, n_estimators=500, n_jobs=1,\n","              nthread=None, objective='binary:logistic', random_state=0,\n","              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,\n","              silent=None, subsample=1, verbosity=1)"]},"metadata":{"tags":[]},"execution_count":40}]},{"cell_type":"code","metadata":{"id":"Ws0xsVzI1U_k","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1605136619634,"user_tz":-330,"elapsed":8532453,"user":{"displayName":"Mansi Gupta","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg-eSqQt6zvClgSY0_QT_GIvO_LGNz3Cau1wv1S7w=s64","userId":"09741357658171476289"}},"outputId":"742f917b-ad25-4f88-9376-d30a7358d1b6"},"source":["from xgboost import XGBClassifier\n","clf = XGBClassifier()\n","\n","param_grid = {\n","        'learning_rate': [ 0.7,0.9,0.8],\n","        'class_weight': [{1: 0.122, 0: 0.877}], 'n_estimators': [500],\n","        'max_depth': [3, 5, 7, 10,15]\n","        #'min_child_weight': [1, 3, 5],\n","        #'subsample': [0.5, 0.7],\n","        #'colsample_bytree': [0.5, 0.7]\n","}\n","\n","grid_search_clf = grid_search_wrapper(refit_score='recall_score')\n","\n","gsf= grid_search_clf.best_estimator_\n","gsf.fit(xtrain[indx],ytrain)\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting 10 folds for each of 15 candidates, totalling 150 fits\n"],"name":"stdout"},{"output_type":"stream","text":["[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n","[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:   42.9s\n","[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:   42.9s\n","[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:  1.4min\n","[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:  1.4min\n","[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:  2.2min\n","[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:  2.2min\n","[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:  2.9min\n","[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:  2.9min\n","[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:  3.6min\n","[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:  3.6min\n","[Parallel(n_jobs=-1)]: Done  11 tasks      | elapsed:  4.8min\n","[Parallel(n_jobs=-1)]: Done  12 tasks      | elapsed:  4.8min\n","[Parallel(n_jobs=-1)]: Done  13 tasks      | elapsed:  6.0min\n","[Parallel(n_jobs=-1)]: Done  14 tasks      | elapsed:  6.0min\n","[Parallel(n_jobs=-1)]: Done  15 tasks      | elapsed:  7.2min\n","[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:  7.2min\n","[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:  8.4min\n","[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:  8.4min\n","[Parallel(n_jobs=-1)]: Done  19 tasks      | elapsed:  9.6min\n","[Parallel(n_jobs=-1)]: Done  20 tasks      | elapsed:  9.6min\n","[Parallel(n_jobs=-1)]: Done  21 tasks      | elapsed: 11.3min\n","[Parallel(n_jobs=-1)]: Done  22 tasks      | elapsed: 11.3min\n","[Parallel(n_jobs=-1)]: Done  23 tasks      | elapsed: 13.0min\n","[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed: 13.0min\n","[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed: 14.7min\n","[Parallel(n_jobs=-1)]: Done  26 tasks      | elapsed: 14.7min\n","[Parallel(n_jobs=-1)]: Done  27 tasks      | elapsed: 16.4min\n","[Parallel(n_jobs=-1)]: Done  28 tasks      | elapsed: 16.4min\n","[Parallel(n_jobs=-1)]: Done  29 tasks      | elapsed: 18.1min\n","[Parallel(n_jobs=-1)]: Done  30 tasks      | elapsed: 18.1min\n","[Parallel(n_jobs=-1)]: Done  31 tasks      | elapsed: 20.6min\n","[Parallel(n_jobs=-1)]: Done  32 tasks      | elapsed: 20.6min\n","[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed: 23.0min\n","[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed: 23.1min\n","[Parallel(n_jobs=-1)]: Done  35 tasks      | elapsed: 25.5min\n","[Parallel(n_jobs=-1)]: Done  36 tasks      | elapsed: 25.5min\n","[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed: 27.9min\n","[Parallel(n_jobs=-1)]: Done  38 tasks      | elapsed: 28.0min\n","[Parallel(n_jobs=-1)]: Done  39 tasks      | elapsed: 30.4min\n","[Parallel(n_jobs=-1)]: Done  40 tasks      | elapsed: 30.5min\n","[Parallel(n_jobs=-1)]: Done  41 tasks      | elapsed: 33.7min\n","[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed: 33.9min\n","[Parallel(n_jobs=-1)]: Done  43 tasks      | elapsed: 37.1min\n","[Parallel(n_jobs=-1)]: Done  44 tasks      | elapsed: 37.3min\n","[Parallel(n_jobs=-1)]: Done  45 tasks      | elapsed: 40.5min\n","[Parallel(n_jobs=-1)]: Done  46 tasks      | elapsed: 40.7min\n","[Parallel(n_jobs=-1)]: Done  47 tasks      | elapsed: 44.0min\n","[Parallel(n_jobs=-1)]: Done  48 tasks      | elapsed: 44.1min\n","[Parallel(n_jobs=-1)]: Done  49 tasks      | elapsed: 47.5min\n","[Parallel(n_jobs=-1)]: Done  50 tasks      | elapsed: 47.6min\n","[Parallel(n_jobs=-1)]: Done  51 tasks      | elapsed: 48.2min\n","[Parallel(n_jobs=-1)]: Done  52 tasks      | elapsed: 48.3min\n","[Parallel(n_jobs=-1)]: Done  53 tasks      | elapsed: 49.0min\n","[Parallel(n_jobs=-1)]: Done  54 tasks      | elapsed: 49.1min\n","[Parallel(n_jobs=-1)]: Done  55 tasks      | elapsed: 49.7min\n","[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed: 49.8min\n","[Parallel(n_jobs=-1)]: Done  57 tasks      | elapsed: 50.4min\n","[Parallel(n_jobs=-1)]: Done  58 tasks      | elapsed: 50.6min\n","[Parallel(n_jobs=-1)]: Done  59 tasks      | elapsed: 51.2min\n","[Parallel(n_jobs=-1)]: Done  60 tasks      | elapsed: 51.3min\n","[Parallel(n_jobs=-1)]: Done  61 tasks      | elapsed: 52.4min\n","[Parallel(n_jobs=-1)]: Done  62 tasks      | elapsed: 52.5min\n","[Parallel(n_jobs=-1)]: Done  63 tasks      | elapsed: 53.6min\n","[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed: 53.8min\n","[Parallel(n_jobs=-1)]: Done  65 tasks      | elapsed: 54.9min\n","[Parallel(n_jobs=-1)]: Done  66 tasks      | elapsed: 55.0min\n","[Parallel(n_jobs=-1)]: Done  67 tasks      | elapsed: 56.1min\n","[Parallel(n_jobs=-1)]: Done  68 tasks      | elapsed: 56.2min\n","[Parallel(n_jobs=-1)]: Done  69 tasks      | elapsed: 57.3min\n","[Parallel(n_jobs=-1)]: Done  70 tasks      | elapsed: 57.4min\n","[Parallel(n_jobs=-1)]: Done  71 tasks      | elapsed: 59.1min\n","[Parallel(n_jobs=-1)]: Done  72 tasks      | elapsed: 59.2min\n","[Parallel(n_jobs=-1)]: Done  73 tasks      | elapsed: 60.8min\n","[Parallel(n_jobs=-1)]: Done  74 tasks      | elapsed: 60.9min\n","[Parallel(n_jobs=-1)]: Done  75 tasks      | elapsed: 62.5min\n","[Parallel(n_jobs=-1)]: Done  76 tasks      | elapsed: 62.7min\n","[Parallel(n_jobs=-1)]: Done  77 tasks      | elapsed: 64.3min\n","[Parallel(n_jobs=-1)]: Done  78 tasks      | elapsed: 64.4min\n","[Parallel(n_jobs=-1)]: Done  79 tasks      | elapsed: 66.0min\n","[Parallel(n_jobs=-1)]: Done  80 tasks      | elapsed: 66.2min\n","[Parallel(n_jobs=-1)]: Done  81 tasks      | elapsed: 68.5min\n","[Parallel(n_jobs=-1)]: Done  82 tasks      | elapsed: 68.7min\n","[Parallel(n_jobs=-1)]: Done  83 tasks      | elapsed: 71.0min\n","[Parallel(n_jobs=-1)]: Done  84 tasks      | elapsed: 71.2min\n","[Parallel(n_jobs=-1)]: Done  85 tasks      | elapsed: 73.5min\n","[Parallel(n_jobs=-1)]: Done  86 tasks      | elapsed: 73.7min\n","[Parallel(n_jobs=-1)]: Done  87 tasks      | elapsed: 76.0min\n","[Parallel(n_jobs=-1)]: Done  88 tasks      | elapsed: 76.2min\n","[Parallel(n_jobs=-1)]: Done  89 tasks      | elapsed: 78.5min\n","[Parallel(n_jobs=-1)]: Done  90 tasks      | elapsed: 78.7min\n","[Parallel(n_jobs=-1)]: Done  91 tasks      | elapsed: 81.7min\n","[Parallel(n_jobs=-1)]: Done  92 tasks      | elapsed: 81.9min\n","[Parallel(n_jobs=-1)]: Done  93 tasks      | elapsed: 84.8min\n","[Parallel(n_jobs=-1)]: Done  94 tasks      | elapsed: 85.0min\n","[Parallel(n_jobs=-1)]: Done  95 tasks      | elapsed: 88.0min\n","[Parallel(n_jobs=-1)]: Done  96 tasks      | elapsed: 88.2min\n","[Parallel(n_jobs=-1)]: Done  97 tasks      | elapsed: 91.2min\n","[Parallel(n_jobs=-1)]: Done  98 tasks      | elapsed: 91.4min\n","[Parallel(n_jobs=-1)]: Done  99 tasks      | elapsed: 94.3min\n","[Parallel(n_jobs=-1)]: Done 100 tasks      | elapsed: 94.6min\n","[Parallel(n_jobs=-1)]: Done 101 tasks      | elapsed: 95.1min\n","[Parallel(n_jobs=-1)]: Done 102 tasks      | elapsed: 95.3min\n","[Parallel(n_jobs=-1)]: Done 103 tasks      | elapsed: 95.8min\n","[Parallel(n_jobs=-1)]: Done 104 tasks      | elapsed: 96.0min\n","[Parallel(n_jobs=-1)]: Done 105 tasks      | elapsed: 96.5min\n","[Parallel(n_jobs=-1)]: Done 106 tasks      | elapsed: 96.8min\n","[Parallel(n_jobs=-1)]: Done 107 tasks      | elapsed: 97.2min\n","[Parallel(n_jobs=-1)]: Done 108 tasks      | elapsed: 97.5min\n","[Parallel(n_jobs=-1)]: Done 109 tasks      | elapsed: 98.0min\n","[Parallel(n_jobs=-1)]: Done 110 tasks      | elapsed: 98.2min\n","[Parallel(n_jobs=-1)]: Done 111 tasks      | elapsed: 99.2min\n","[Parallel(n_jobs=-1)]: Done 112 tasks      | elapsed: 99.4min\n","[Parallel(n_jobs=-1)]: Done 113 tasks      | elapsed: 100.4min\n","[Parallel(n_jobs=-1)]: Done 114 tasks      | elapsed: 100.6min\n","[Parallel(n_jobs=-1)]: Done 115 tasks      | elapsed: 101.6min\n","[Parallel(n_jobs=-1)]: Done 116 tasks      | elapsed: 101.8min\n","[Parallel(n_jobs=-1)]: Done 117 tasks      | elapsed: 102.8min\n","[Parallel(n_jobs=-1)]: Done 118 tasks      | elapsed: 103.0min\n","[Parallel(n_jobs=-1)]: Done 119 tasks      | elapsed: 104.0min\n","[Parallel(n_jobs=-1)]: Done 120 tasks      | elapsed: 104.2min\n","[Parallel(n_jobs=-1)]: Done 121 tasks      | elapsed: 105.6min\n","[Parallel(n_jobs=-1)]: Done 122 tasks      | elapsed: 105.9min\n","[Parallel(n_jobs=-1)]: Done 123 tasks      | elapsed: 107.3min\n","[Parallel(n_jobs=-1)]: Done 124 tasks      | elapsed: 107.6min\n","[Parallel(n_jobs=-1)]: Done 125 tasks      | elapsed: 109.1min\n","[Parallel(n_jobs=-1)]: Done 126 tasks      | elapsed: 109.3min\n","[Parallel(n_jobs=-1)]: Done 127 tasks      | elapsed: 110.8min\n","[Parallel(n_jobs=-1)]: Done 128 tasks      | elapsed: 111.0min\n","[Parallel(n_jobs=-1)]: Done 129 tasks      | elapsed: 112.4min\n","[Parallel(n_jobs=-1)]: Done 130 tasks      | elapsed: 112.7min\n","[Parallel(n_jobs=-1)]: Done 131 tasks      | elapsed: 114.9min\n","[Parallel(n_jobs=-1)]: Done 132 tasks      | elapsed: 115.2min\n","[Parallel(n_jobs=-1)]: Done 133 tasks      | elapsed: 117.3min\n","[Parallel(n_jobs=-1)]: Done 134 tasks      | elapsed: 117.6min\n","[Parallel(n_jobs=-1)]: Done 135 tasks      | elapsed: 119.8min\n","[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed: 120.1min\n","[Parallel(n_jobs=-1)]: Done 137 tasks      | elapsed: 122.2min\n","[Parallel(n_jobs=-1)]: Done 138 tasks      | elapsed: 122.5min\n","[Parallel(n_jobs=-1)]: Done 139 tasks      | elapsed: 124.7min\n","[Parallel(n_jobs=-1)]: Done 140 tasks      | elapsed: 125.0min\n","[Parallel(n_jobs=-1)]: Done 141 tasks      | elapsed: 127.9min\n","[Parallel(n_jobs=-1)]: Done 142 tasks      | elapsed: 128.2min\n","[Parallel(n_jobs=-1)]: Done 143 tasks      | elapsed: 131.2min\n","[Parallel(n_jobs=-1)]: Done 144 tasks      | elapsed: 131.5min\n","[Parallel(n_jobs=-1)]: Done 145 tasks      | elapsed: 134.4min\n","[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed: 134.7min\n","[Parallel(n_jobs=-1)]: Done 147 tasks      | elapsed: 137.7min\n","[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed: 141.1min finished\n"],"name":"stderr"},{"output_type":"stream","text":["Best params for recall_score\n","{'class_weight': {1: 0.122, 0: 0.877}, 'learning_rate': 0.9, 'max_depth': 5, 'n_estimators': 500}\n","\n","Confusion matrix of Random Forest optimized for recall_score on the test data:\n","     pred_neg  pred_pos\n","neg     15631       626\n","pos      1673       599\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/plain":["XGBClassifier(base_score=0.5, booster='gbtree',\n","              class_weight={0: 0.877, 1: 0.122}, colsample_bylevel=1,\n","              colsample_bynode=1, colsample_bytree=1, gamma=0,\n","              learning_rate=0.9, max_delta_step=0, max_depth=5,\n","              min_child_weight=1, missing=None, n_estimators=500, n_jobs=1,\n","              nthread=None, objective='binary:logistic', random_state=0,\n","              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,\n","              silent=None, subsample=1, verbosity=1)"]},"metadata":{"tags":[]},"execution_count":41}]},{"cell_type":"code","metadata":{"id":"UcosObcqCVLL","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1605216837249,"user_tz":-330,"elapsed":4624389,"user":{"displayName":"Mansi Gupta","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg-eSqQt6zvClgSY0_QT_GIvO_LGNz3Cau1wv1S7w=s64","userId":"09741357658171476289"}},"outputId":"aa68ea90-fa18-42a2-b5b2-b1c7bde401e3"},"source":["from xgboost import XGBClassifier\n","clf = XGBClassifier()\n","\n","param_grid = {\n","        'learning_rate': [ 0.9],\n","        'class_weight': [{1: 0.122, 0: 0.877}], 'n_estimators': [500],\n","        'max_depth': [5],\n","        'min_child_weight': [1, 3, 5,10,15,50,100,75,125]\n","        #'subsample': [0.5, 0.7],\n","        #'colsample_bytree': [0.5, 0.7]\n","}\n","\n","grid_search_clf = grid_search_wrapper(refit_score='recall_score')\n","\n","gsf= grid_search_clf.best_estimator_\n","gsf.fit(xtrain[indx],ytrain)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting 10 folds for each of 9 candidates, totalling 90 fits\n"],"name":"stdout"},{"output_type":"stream","text":["[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n","[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:  1.5min\n","[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:  1.5min\n","[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:  3.0min\n","[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:  3.0min\n","[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:  4.4min\n","[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:  4.4min\n","[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:  5.9min\n","[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:  5.9min\n","[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:  7.3min\n","[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:  7.3min\n","[Parallel(n_jobs=-1)]: Done  11 tasks      | elapsed:  8.7min\n","[Parallel(n_jobs=-1)]: Done  12 tasks      | elapsed:  8.8min\n","[Parallel(n_jobs=-1)]: Done  13 tasks      | elapsed: 10.2min\n","[Parallel(n_jobs=-1)]: Done  14 tasks      | elapsed: 10.2min\n","[Parallel(n_jobs=-1)]: Done  15 tasks      | elapsed: 11.6min\n","[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed: 11.6min\n","[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed: 13.0min\n","[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed: 13.1min\n","[Parallel(n_jobs=-1)]: Done  19 tasks      | elapsed: 14.5min\n","[Parallel(n_jobs=-1)]: Done  20 tasks      | elapsed: 14.5min\n","[Parallel(n_jobs=-1)]: Done  21 tasks      | elapsed: 15.9min\n","[Parallel(n_jobs=-1)]: Done  22 tasks      | elapsed: 15.9min\n","[Parallel(n_jobs=-1)]: Done  23 tasks      | elapsed: 17.3min\n","[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed: 17.3min\n","[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed: 18.7min\n","[Parallel(n_jobs=-1)]: Done  26 tasks      | elapsed: 18.8min\n","[Parallel(n_jobs=-1)]: Done  27 tasks      | elapsed: 20.2min\n","[Parallel(n_jobs=-1)]: Done  28 tasks      | elapsed: 20.2min\n","[Parallel(n_jobs=-1)]: Done  29 tasks      | elapsed: 21.6min\n","[Parallel(n_jobs=-1)]: Done  30 tasks      | elapsed: 21.7min\n","[Parallel(n_jobs=-1)]: Done  31 tasks      | elapsed: 23.0min\n","[Parallel(n_jobs=-1)]: Done  32 tasks      | elapsed: 23.1min\n","[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed: 24.4min\n","[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed: 24.5min\n","[Parallel(n_jobs=-1)]: Done  35 tasks      | elapsed: 25.8min\n","[Parallel(n_jobs=-1)]: Done  36 tasks      | elapsed: 25.9min\n","[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed: 27.2min\n","[Parallel(n_jobs=-1)]: Done  38 tasks      | elapsed: 27.3min\n","[Parallel(n_jobs=-1)]: Done  39 tasks      | elapsed: 28.7min\n","[Parallel(n_jobs=-1)]: Done  40 tasks      | elapsed: 28.8min\n","[Parallel(n_jobs=-1)]: Done  41 tasks      | elapsed: 30.1min\n","[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed: 30.2min\n","[Parallel(n_jobs=-1)]: Done  43 tasks      | elapsed: 31.5min\n","[Parallel(n_jobs=-1)]: Done  44 tasks      | elapsed: 31.6min\n","[Parallel(n_jobs=-1)]: Done  45 tasks      | elapsed: 32.9min\n","[Parallel(n_jobs=-1)]: Done  46 tasks      | elapsed: 33.0min\n","[Parallel(n_jobs=-1)]: Done  47 tasks      | elapsed: 34.3min\n","[Parallel(n_jobs=-1)]: Done  48 tasks      | elapsed: 34.4min\n","[Parallel(n_jobs=-1)]: Done  49 tasks      | elapsed: 35.7min\n","[Parallel(n_jobs=-1)]: Done  50 tasks      | elapsed: 35.9min\n","[Parallel(n_jobs=-1)]: Done  51 tasks      | elapsed: 37.1min\n","[Parallel(n_jobs=-1)]: Done  52 tasks      | elapsed: 37.3min\n","[Parallel(n_jobs=-1)]: Done  53 tasks      | elapsed: 38.6min\n","[Parallel(n_jobs=-1)]: Done  54 tasks      | elapsed: 38.7min\n","[Parallel(n_jobs=-1)]: Done  55 tasks      | elapsed: 40.0min\n","[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed: 40.1min\n","[Parallel(n_jobs=-1)]: Done  57 tasks      | elapsed: 41.4min\n","[Parallel(n_jobs=-1)]: Done  58 tasks      | elapsed: 41.5min\n","[Parallel(n_jobs=-1)]: Done  59 tasks      | elapsed: 42.8min\n","[Parallel(n_jobs=-1)]: Done  60 tasks      | elapsed: 42.9min\n","[Parallel(n_jobs=-1)]: Done  61 tasks      | elapsed: 44.4min\n","[Parallel(n_jobs=-1)]: Done  62 tasks      | elapsed: 44.5min\n","[Parallel(n_jobs=-1)]: Done  63 tasks      | elapsed: 46.1min\n","[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed: 46.3min\n","[Parallel(n_jobs=-1)]: Done  65 tasks      | elapsed: 47.7min\n","[Parallel(n_jobs=-1)]: Done  66 tasks      | elapsed: 47.9min\n","[Parallel(n_jobs=-1)]: Done  67 tasks      | elapsed: 49.3min\n","[Parallel(n_jobs=-1)]: Done  68 tasks      | elapsed: 49.5min\n","[Parallel(n_jobs=-1)]: Done  69 tasks      | elapsed: 51.3min\n","[Parallel(n_jobs=-1)]: Done  70 tasks      | elapsed: 51.5min\n","[Parallel(n_jobs=-1)]: Done  71 tasks      | elapsed: 53.0min\n","[Parallel(n_jobs=-1)]: Done  72 tasks      | elapsed: 53.2min\n","[Parallel(n_jobs=-1)]: Done  73 tasks      | elapsed: 54.5min\n","[Parallel(n_jobs=-1)]: Done  74 tasks      | elapsed: 54.6min\n","[Parallel(n_jobs=-1)]: Done  75 tasks      | elapsed: 55.9min\n","[Parallel(n_jobs=-1)]: Done  76 tasks      | elapsed: 56.1min\n","[Parallel(n_jobs=-1)]: Done  77 tasks      | elapsed: 57.6min\n","[Parallel(n_jobs=-1)]: Done  78 tasks      | elapsed: 57.8min\n","[Parallel(n_jobs=-1)]: Done  79 tasks      | elapsed: 59.3min\n","[Parallel(n_jobs=-1)]: Done  80 tasks      | elapsed: 59.5min\n","[Parallel(n_jobs=-1)]: Done  81 tasks      | elapsed: 60.8min\n","[Parallel(n_jobs=-1)]: Done  82 tasks      | elapsed: 61.0min\n","[Parallel(n_jobs=-1)]: Done  83 tasks      | elapsed: 62.4min\n","[Parallel(n_jobs=-1)]: Done  84 tasks      | elapsed: 62.7min\n","[Parallel(n_jobs=-1)]: Done  85 tasks      | elapsed: 64.2min\n","[Parallel(n_jobs=-1)]: Done  86 tasks      | elapsed: 64.5min\n","[Parallel(n_jobs=-1)]: Done  87 tasks      | elapsed: 65.9min\n","[Parallel(n_jobs=-1)]: Done  90 out of  90 | elapsed: 67.5min finished\n"],"name":"stderr"},{"output_type":"stream","text":["Best params for recall_score\n","{'class_weight': {1: 0.122, 0: 0.877}, 'learning_rate': 0.9, 'max_depth': 5, 'min_child_weight': 10, 'n_estimators': 500}\n","\n","Confusion matrix of Random Forest optimized for recall_score on the test data:\n","     pred_neg  pred_pos\n","neg     15612       645\n","pos      1647       625\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/plain":["XGBClassifier(base_score=0.5, booster='gbtree',\n","              class_weight={0: 0.877, 1: 0.122}, colsample_bylevel=1,\n","              colsample_bynode=1, colsample_bytree=1, gamma=0,\n","              learning_rate=0.9, max_delta_step=0, max_depth=5,\n","              min_child_weight=10, missing=None, n_estimators=500, n_jobs=1,\n","              nthread=None, objective='binary:logistic', random_state=0,\n","              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,\n","              silent=None, subsample=1, verbosity=1)"]},"metadata":{"tags":[]},"execution_count":23}]},{"cell_type":"code","metadata":{"id":"llg5FJjVhYeb","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1605220035098,"user_tz":-330,"elapsed":2874303,"user":{"displayName":"Mansi Gupta","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg-eSqQt6zvClgSY0_QT_GIvO_LGNz3Cau1wv1S7w=s64","userId":"09741357658171476289"}},"outputId":"14766729-0900-4eae-b54f-ef84293c78df"},"source":["from xgboost import XGBClassifier\n","clf = XGBClassifier()\n","\n","param_grid = {\n","        'learning_rate': [ 0.9],\n","        'class_weight': [{1: 0.122, 0: 0.877}], 'n_estimators': [500],\n","        'max_depth': [5],\n","        'min_child_weight': [10],\n","        'subsample': [0.2,0.3,0.5,0.6, 0.7,0.9],\n","        #'colsample_bytree': [0.5, 0.7]\n","}\n","\n","grid_search_clf = grid_search_wrapper(refit_score='recall_score')\n","\n","gsf= grid_search_clf.best_estimator_\n","gsf.fit(xtrain[indx],ytrain)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting 10 folds for each of 6 candidates, totalling 60 fits\n"],"name":"stdout"},{"output_type":"stream","text":["[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n","[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:  1.2min\n","[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:  1.2min\n","[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:  2.3min\n","[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:  2.3min\n","[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:  3.4min\n","[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:  3.4min\n","[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:  4.6min\n","[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:  4.6min\n","[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:  5.7min\n","[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:  5.7min\n","[Parallel(n_jobs=-1)]: Done  11 tasks      | elapsed:  7.1min\n","[Parallel(n_jobs=-1)]: Done  12 tasks      | elapsed:  7.1min\n","[Parallel(n_jobs=-1)]: Done  13 tasks      | elapsed:  8.4min\n","[Parallel(n_jobs=-1)]: Done  14 tasks      | elapsed:  8.5min\n","[Parallel(n_jobs=-1)]: Done  15 tasks      | elapsed:  9.8min\n","[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:  9.8min\n","[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed: 11.2min\n","[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed: 11.2min\n","[Parallel(n_jobs=-1)]: Done  19 tasks      | elapsed: 12.5min\n","[Parallel(n_jobs=-1)]: Done  20 tasks      | elapsed: 12.6min\n","[Parallel(n_jobs=-1)]: Done  21 tasks      | elapsed: 14.3min\n","[Parallel(n_jobs=-1)]: Done  22 tasks      | elapsed: 14.4min\n","[Parallel(n_jobs=-1)]: Done  23 tasks      | elapsed: 16.0min\n","[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed: 16.1min\n","[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed: 17.7min\n","[Parallel(n_jobs=-1)]: Done  26 tasks      | elapsed: 17.8min\n","[Parallel(n_jobs=-1)]: Done  27 tasks      | elapsed: 19.4min\n","[Parallel(n_jobs=-1)]: Done  28 tasks      | elapsed: 19.5min\n","[Parallel(n_jobs=-1)]: Done  29 tasks      | elapsed: 21.1min\n","[Parallel(n_jobs=-1)]: Done  30 tasks      | elapsed: 21.3min\n","[Parallel(n_jobs=-1)]: Done  31 tasks      | elapsed: 22.9min\n","[Parallel(n_jobs=-1)]: Done  32 tasks      | elapsed: 23.0min\n","[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed: 24.7min\n","[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed: 24.8min\n","[Parallel(n_jobs=-1)]: Done  35 tasks      | elapsed: 26.4min\n","[Parallel(n_jobs=-1)]: Done  36 tasks      | elapsed: 26.6min\n","[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed: 28.2min\n","[Parallel(n_jobs=-1)]: Done  38 tasks      | elapsed: 28.3min\n","[Parallel(n_jobs=-1)]: Done  39 tasks      | elapsed: 29.9min\n","[Parallel(n_jobs=-1)]: Done  40 tasks      | elapsed: 30.1min\n","[Parallel(n_jobs=-1)]: Done  41 tasks      | elapsed: 31.6min\n","[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed: 31.8min\n","[Parallel(n_jobs=-1)]: Done  43 tasks      | elapsed: 33.3min\n","[Parallel(n_jobs=-1)]: Done  44 tasks      | elapsed: 33.5min\n","[Parallel(n_jobs=-1)]: Done  45 tasks      | elapsed: 35.0min\n","[Parallel(n_jobs=-1)]: Done  46 tasks      | elapsed: 35.3min\n","[Parallel(n_jobs=-1)]: Done  47 tasks      | elapsed: 36.7min\n","[Parallel(n_jobs=-1)]: Done  48 tasks      | elapsed: 37.0min\n","[Parallel(n_jobs=-1)]: Done  49 tasks      | elapsed: 38.4min\n","[Parallel(n_jobs=-1)]: Done  50 tasks      | elapsed: 38.7min\n","[Parallel(n_jobs=-1)]: Done  51 tasks      | elapsed: 39.9min\n","[Parallel(n_jobs=-1)]: Done  52 tasks      | elapsed: 40.2min\n","[Parallel(n_jobs=-1)]: Done  53 tasks      | elapsed: 41.5min\n","[Parallel(n_jobs=-1)]: Done  54 tasks      | elapsed: 41.8min\n","[Parallel(n_jobs=-1)]: Done  55 tasks      | elapsed: 43.0min\n","[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed: 43.3min\n","[Parallel(n_jobs=-1)]: Done  57 tasks      | elapsed: 44.6min\n","[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed: 46.4min remaining:    0.0s\n","[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed: 46.4min finished\n"],"name":"stderr"},{"output_type":"stream","text":["Best params for recall_score\n","{'class_weight': {1: 0.122, 0: 0.877}, 'learning_rate': 0.9, 'max_depth': 5, 'min_child_weight': 10, 'n_estimators': 500, 'subsample': 0.3}\n","\n","Confusion matrix of Random Forest optimized for recall_score on the test data:\n","     pred_neg  pred_pos\n","neg     14653      1604\n","pos      1641       631\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/plain":["XGBClassifier(base_score=0.5, booster='gbtree',\n","              class_weight={0: 0.877, 1: 0.122}, colsample_bylevel=1,\n","              colsample_bynode=1, colsample_bytree=1, gamma=0,\n","              learning_rate=0.9, max_delta_step=0, max_depth=5,\n","              min_child_weight=10, missing=None, n_estimators=500, n_jobs=1,\n","              nthread=None, objective='binary:logistic', random_state=0,\n","              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,\n","              silent=None, subsample=0.3, verbosity=1)"]},"metadata":{"tags":[]},"execution_count":25}]},{"cell_type":"code","metadata":{"id":"coxgSVuJj3td","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1605222968151,"user_tz":-330,"elapsed":2536575,"user":{"displayName":"Mansi Gupta","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg-eSqQt6zvClgSY0_QT_GIvO_LGNz3Cau1wv1S7w=s64","userId":"09741357658171476289"}},"outputId":"8216e3b1-2fe8-49c6-c7df-7ac70ef22e4f"},"source":["from xgboost import XGBClassifier\n","clf = XGBClassifier()\n","\n","param_grid = {\n","        'learning_rate': [ 0.9],\n","        'class_weight': [{1: 0.122, 0: 0.877}], 'n_estimators': [500],\n","        'max_depth': [5],\n","        'min_child_weight': [10],\n","        'subsample': [0.3],\n","        'scale_pos_weight': [0.2,0.3,0.5,0.6, 0.7,0.9]\n","}\n","\n","grid_search_clf = grid_search_wrapper(refit_score='recall_score')\n","\n","gsf= grid_search_clf.best_estimator_\n","gsf.fit(xtrain[indx],ytrain)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting 10 folds for each of 6 candidates, totalling 60 fits\n"],"name":"stdout"},{"output_type":"stream","text":["[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n","[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:  1.3min\n","[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:  1.3min\n","[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:  2.6min\n","[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:  2.6min\n","[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:  3.9min\n","[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:  4.0min\n","[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:  5.3min\n","[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:  5.3min\n","[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:  6.6min\n","[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:  6.6min\n","[Parallel(n_jobs=-1)]: Done  11 tasks      | elapsed:  8.0min\n","[Parallel(n_jobs=-1)]: Done  12 tasks      | elapsed:  8.0min\n","[Parallel(n_jobs=-1)]: Done  13 tasks      | elapsed:  9.3min\n","[Parallel(n_jobs=-1)]: Done  14 tasks      | elapsed:  9.3min\n","[Parallel(n_jobs=-1)]: Done  15 tasks      | elapsed: 10.6min\n","[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed: 10.7min\n","[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed: 12.0min\n","[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed: 12.0min\n","[Parallel(n_jobs=-1)]: Done  19 tasks      | elapsed: 13.3min\n","[Parallel(n_jobs=-1)]: Done  20 tasks      | elapsed: 13.3min\n","[Parallel(n_jobs=-1)]: Done  21 tasks      | elapsed: 14.7min\n","[Parallel(n_jobs=-1)]: Done  22 tasks      | elapsed: 14.7min\n","[Parallel(n_jobs=-1)]: Done  23 tasks      | elapsed: 16.0min\n","[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed: 16.1min\n","[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed: 17.5min\n","[Parallel(n_jobs=-1)]: Done  26 tasks      | elapsed: 17.5min\n","[Parallel(n_jobs=-1)]: Done  27 tasks      | elapsed: 18.8min\n","[Parallel(n_jobs=-1)]: Done  28 tasks      | elapsed: 18.8min\n","[Parallel(n_jobs=-1)]: Done  29 tasks      | elapsed: 20.2min\n","[Parallel(n_jobs=-1)]: Done  30 tasks      | elapsed: 20.2min\n","[Parallel(n_jobs=-1)]: Done  31 tasks      | elapsed: 21.5min\n","[Parallel(n_jobs=-1)]: Done  32 tasks      | elapsed: 21.6min\n","[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed: 22.9min\n","[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed: 22.9min\n","[Parallel(n_jobs=-1)]: Done  35 tasks      | elapsed: 24.3min\n","[Parallel(n_jobs=-1)]: Done  36 tasks      | elapsed: 24.3min\n","[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed: 25.6min\n","[Parallel(n_jobs=-1)]: Done  38 tasks      | elapsed: 25.7min\n","[Parallel(n_jobs=-1)]: Done  39 tasks      | elapsed: 27.1min\n","[Parallel(n_jobs=-1)]: Done  40 tasks      | elapsed: 27.1min\n","[Parallel(n_jobs=-1)]: Done  41 tasks      | elapsed: 28.4min\n","[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed: 28.5min\n","[Parallel(n_jobs=-1)]: Done  43 tasks      | elapsed: 29.8min\n","[Parallel(n_jobs=-1)]: Done  44 tasks      | elapsed: 29.9min\n","[Parallel(n_jobs=-1)]: Done  45 tasks      | elapsed: 31.2min\n","[Parallel(n_jobs=-1)]: Done  46 tasks      | elapsed: 31.2min\n","[Parallel(n_jobs=-1)]: Done  47 tasks      | elapsed: 32.5min\n","[Parallel(n_jobs=-1)]: Done  48 tasks      | elapsed: 32.6min\n","[Parallel(n_jobs=-1)]: Done  49 tasks      | elapsed: 33.9min\n","[Parallel(n_jobs=-1)]: Done  50 tasks      | elapsed: 34.0min\n","[Parallel(n_jobs=-1)]: Done  51 tasks      | elapsed: 35.3min\n","[Parallel(n_jobs=-1)]: Done  52 tasks      | elapsed: 35.3min\n","[Parallel(n_jobs=-1)]: Done  53 tasks      | elapsed: 36.6min\n","[Parallel(n_jobs=-1)]: Done  54 tasks      | elapsed: 36.7min\n","[Parallel(n_jobs=-1)]: Done  55 tasks      | elapsed: 38.1min\n","[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed: 38.1min\n","[Parallel(n_jobs=-1)]: Done  57 tasks      | elapsed: 39.5min\n","[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed: 40.9min remaining:    0.0s\n","[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed: 40.9min finished\n"],"name":"stderr"},{"output_type":"stream","text":["Best params for recall_score\n","{'class_weight': {1: 0.122, 0: 0.877}, 'learning_rate': 0.9, 'max_depth': 5, 'min_child_weight': 10, 'n_estimators': 500, 'scale_pos_weight': 0.9, 'subsample': 0.3}\n","\n","Confusion matrix of Random Forest optimized for recall_score on the test data:\n","     pred_neg  pred_pos\n","neg     14716      1541\n","pos      1609       663\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/plain":["XGBClassifier(base_score=0.5, booster='gbtree',\n","              class_weight={0: 0.877, 1: 0.122}, colsample_bylevel=1,\n","              colsample_bynode=1, colsample_bytree=1, gamma=0,\n","              learning_rate=0.9, max_delta_step=0, max_depth=5,\n","              min_child_weight=10, missing=None, n_estimators=500, n_jobs=1,\n","              nthread=None, objective='binary:logistic', random_state=0,\n","              reg_alpha=0, reg_lambda=1, scale_pos_weight=0.9, seed=None,\n","              silent=None, subsample=0.3, verbosity=1)"]},"metadata":{"tags":[]},"execution_count":26}]},{"cell_type":"code","metadata":{"id":"hv7ysLIZkpwR","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1605720932857,"user_tz":-330,"elapsed":2543185,"user":{"displayName":"Mansi Gupta","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg-eSqQt6zvClgSY0_QT_GIvO_LGNz3Cau1wv1S7w=s64","userId":"09741357658171476289"}},"outputId":"d6f79117-774b-4f1e-fcf7-1e2ea69144ff"},"source":["from xgboost import XGBClassifier\n","clf = XGBClassifier()\n","\n","param_grid = {\n","        'learning_rate': [ 0.9],\n","        'class_weight': [{1: 0.122, 0: 0.877}], 'n_estimators': [500],\n","        'max_depth': [5],\n","        'min_child_weight': [10],\n","        'subsample': [0.3],\n","        'scale_pos_weight': [0.2,0.3,0.5,0.6, 0.7,0.9]\n","}\n","\n","grid_search_clf = grid_search_wrapper(refit_score='recall_score')\n","\n","gsf= grid_search_clf.best_estimator_\n","gsf.fit(xtrain[indx],ytrain)"],"execution_count":59,"outputs":[{"output_type":"stream","text":["Fitting 10 folds for each of 6 candidates, totalling 60 fits\n"],"name":"stdout"},{"output_type":"stream","text":["[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n","[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:  1.3min\n","[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:  1.3min\n","[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:  2.7min\n","[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:  2.7min\n","[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:  4.0min\n","[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:  4.0min\n","[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:  5.3min\n","[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:  5.3min\n","[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:  6.7min\n","[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:  6.7min\n","[Parallel(n_jobs=-1)]: Done  11 tasks      | elapsed:  8.0min\n","[Parallel(n_jobs=-1)]: Done  12 tasks      | elapsed:  8.0min\n","[Parallel(n_jobs=-1)]: Done  13 tasks      | elapsed:  9.3min\n","[Parallel(n_jobs=-1)]: Done  14 tasks      | elapsed:  9.3min\n","[Parallel(n_jobs=-1)]: Done  15 tasks      | elapsed: 10.7min\n","[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed: 10.7min\n","[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed: 12.0min\n","[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed: 12.0min\n","[Parallel(n_jobs=-1)]: Done  19 tasks      | elapsed: 13.4min\n","[Parallel(n_jobs=-1)]: Done  20 tasks      | elapsed: 13.4min\n","[Parallel(n_jobs=-1)]: Done  21 tasks      | elapsed: 14.8min\n","[Parallel(n_jobs=-1)]: Done  22 tasks      | elapsed: 14.8min\n","[Parallel(n_jobs=-1)]: Done  23 tasks      | elapsed: 16.2min\n","[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed: 16.2min\n","[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed: 17.5min\n","[Parallel(n_jobs=-1)]: Done  26 tasks      | elapsed: 17.5min\n","[Parallel(n_jobs=-1)]: Done  27 tasks      | elapsed: 18.9min\n","[Parallel(n_jobs=-1)]: Done  28 tasks      | elapsed: 18.9min\n","[Parallel(n_jobs=-1)]: Done  29 tasks      | elapsed: 20.2min\n","[Parallel(n_jobs=-1)]: Done  30 tasks      | elapsed: 20.2min\n","[Parallel(n_jobs=-1)]: Done  31 tasks      | elapsed: 21.6min\n","[Parallel(n_jobs=-1)]: Done  32 tasks      | elapsed: 21.6min\n","[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed: 23.0min\n","[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed: 23.0min\n","[Parallel(n_jobs=-1)]: Done  35 tasks      | elapsed: 24.4min\n","[Parallel(n_jobs=-1)]: Done  36 tasks      | elapsed: 24.4min\n","[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed: 25.8min\n","[Parallel(n_jobs=-1)]: Done  38 tasks      | elapsed: 25.8min\n","[Parallel(n_jobs=-1)]: Done  39 tasks      | elapsed: 27.1min\n","[Parallel(n_jobs=-1)]: Done  40 tasks      | elapsed: 27.1min\n","[Parallel(n_jobs=-1)]: Done  41 tasks      | elapsed: 28.5min\n","[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed: 28.5min\n","[Parallel(n_jobs=-1)]: Done  43 tasks      | elapsed: 29.8min\n","[Parallel(n_jobs=-1)]: Done  44 tasks      | elapsed: 29.9min\n","[Parallel(n_jobs=-1)]: Done  45 tasks      | elapsed: 31.2min\n","[Parallel(n_jobs=-1)]: Done  46 tasks      | elapsed: 31.3min\n","[Parallel(n_jobs=-1)]: Done  47 tasks      | elapsed: 32.6min\n","[Parallel(n_jobs=-1)]: Done  48 tasks      | elapsed: 32.7min\n","[Parallel(n_jobs=-1)]: Done  49 tasks      | elapsed: 34.0min\n","[Parallel(n_jobs=-1)]: Done  50 tasks      | elapsed: 34.1min\n","[Parallel(n_jobs=-1)]: Done  51 tasks      | elapsed: 35.4min\n","[Parallel(n_jobs=-1)]: Done  52 tasks      | elapsed: 35.5min\n","[Parallel(n_jobs=-1)]: Done  53 tasks      | elapsed: 36.8min\n","[Parallel(n_jobs=-1)]: Done  54 tasks      | elapsed: 36.8min\n","[Parallel(n_jobs=-1)]: Done  55 tasks      | elapsed: 38.2min\n","[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed: 38.2min\n","[Parallel(n_jobs=-1)]: Done  57 tasks      | elapsed: 39.5min\n","[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed: 40.9min remaining:    0.0s\n","[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed: 40.9min finished\n"],"name":"stderr"},{"output_type":"stream","text":["Best params for recall_score\n","{'class_weight': {1: 0.122, 0: 0.877}, 'learning_rate': 0.9, 'max_depth': 5, 'min_child_weight': 10, 'n_estimators': 500, 'scale_pos_weight': 0.9, 'subsample': 0.3}\n","\n","Confusion matrix of Random Forest optimized for recall_score on the test data:\n","     pred_neg  pred_pos\n","neg     14661      1596\n","pos      1655       617\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/plain":["XGBClassifier(base_score=0.5, booster='gbtree',\n","              class_weight={0: 0.877, 1: 0.122}, colsample_bylevel=1,\n","              colsample_bynode=1, colsample_bytree=1, gamma=0,\n","              learning_rate=0.9, max_delta_step=0, max_depth=5,\n","              min_child_weight=10, missing=None, n_estimators=500, n_jobs=1,\n","              nthread=None, objective='binary:logistic', random_state=0,\n","              reg_alpha=0, reg_lambda=1, scale_pos_weight=0.9, seed=None,\n","              silent=None, subsample=0.3, verbosity=1)"]},"metadata":{"tags":[]},"execution_count":59}]},{"cell_type":"code","metadata":{"id":"EJvAkPLtrj07","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1605723438223,"user_tz":-330,"elapsed":2505328,"user":{"displayName":"Mansi Gupta","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg-eSqQt6zvClgSY0_QT_GIvO_LGNz3Cau1wv1S7w=s64","userId":"09741357658171476289"}},"outputId":"80563a8b-4371-4a6a-c69d-2ba38357cc21"},"source":["from xgboost import XGBClassifier\n","clf = XGBClassifier()\n","\n","param_grid = {\n","        'learning_rate': [ 0.9],\n","        'class_weight': [{1: 0.122, 0: 0.877}], 'n_estimators': [500],\n","        'max_depth': [5],\n","        'min_child_weight': [10],\n","        'subsample': [0.3],\n","        'scale_pos_weight': [0.8,0.9,1,9]\n","}\n","\n","grid_search_clf = grid_search_wrapper(refit_score='roc_auc_score')\n","\n","gsf= grid_search_clf.best_estimator_\n","\n"],"execution_count":60,"outputs":[{"output_type":"stream","text":["Fitting 10 folds for each of 6 candidates, totalling 60 fits\n"],"name":"stdout"},{"output_type":"stream","text":["[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n","[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:  1.4min\n","[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:  1.4min\n","[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:  2.7min\n","[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:  2.7min\n","[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:  4.0min\n","[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:  4.0min\n","[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:  5.3min\n","[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:  5.3min\n","[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:  6.6min\n","[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:  6.6min\n","[Parallel(n_jobs=-1)]: Done  11 tasks      | elapsed:  7.9min\n","[Parallel(n_jobs=-1)]: Done  12 tasks      | elapsed:  8.0min\n","[Parallel(n_jobs=-1)]: Done  13 tasks      | elapsed:  9.3min\n","[Parallel(n_jobs=-1)]: Done  14 tasks      | elapsed:  9.3min\n","[Parallel(n_jobs=-1)]: Done  15 tasks      | elapsed: 10.7min\n","[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed: 10.7min\n","[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed: 12.0min\n","[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed: 12.0min\n","[Parallel(n_jobs=-1)]: Done  19 tasks      | elapsed: 13.3min\n","[Parallel(n_jobs=-1)]: Done  20 tasks      | elapsed: 13.4min\n","[Parallel(n_jobs=-1)]: Done  21 tasks      | elapsed: 14.7min\n","[Parallel(n_jobs=-1)]: Done  22 tasks      | elapsed: 14.7min\n","[Parallel(n_jobs=-1)]: Done  23 tasks      | elapsed: 16.0min\n","[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed: 16.1min\n","[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed: 17.4min\n","[Parallel(n_jobs=-1)]: Done  26 tasks      | elapsed: 17.4min\n","[Parallel(n_jobs=-1)]: Done  27 tasks      | elapsed: 18.7min\n","[Parallel(n_jobs=-1)]: Done  28 tasks      | elapsed: 18.8min\n","[Parallel(n_jobs=-1)]: Done  29 tasks      | elapsed: 20.1min\n","[Parallel(n_jobs=-1)]: Done  30 tasks      | elapsed: 20.1min\n","[Parallel(n_jobs=-1)]: Done  31 tasks      | elapsed: 21.5min\n","[Parallel(n_jobs=-1)]: Done  32 tasks      | elapsed: 21.5min\n","[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed: 22.9min\n","[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed: 22.9min\n","[Parallel(n_jobs=-1)]: Done  35 tasks      | elapsed: 24.2min\n","[Parallel(n_jobs=-1)]: Done  36 tasks      | elapsed: 24.2min\n","[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed: 25.6min\n","[Parallel(n_jobs=-1)]: Done  38 tasks      | elapsed: 25.6min\n","[Parallel(n_jobs=-1)]: Done  39 tasks      | elapsed: 26.9min\n","[Parallel(n_jobs=-1)]: Done  40 tasks      | elapsed: 27.0min\n","[Parallel(n_jobs=-1)]: Done  41 tasks      | elapsed: 28.3min\n","[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed: 28.3min\n","[Parallel(n_jobs=-1)]: Done  43 tasks      | elapsed: 29.6min\n","[Parallel(n_jobs=-1)]: Done  44 tasks      | elapsed: 29.7min\n","[Parallel(n_jobs=-1)]: Done  45 tasks      | elapsed: 31.1min\n","[Parallel(n_jobs=-1)]: Done  46 tasks      | elapsed: 31.1min\n","[Parallel(n_jobs=-1)]: Done  47 tasks      | elapsed: 32.4min\n","[Parallel(n_jobs=-1)]: Done  48 tasks      | elapsed: 32.5min\n","[Parallel(n_jobs=-1)]: Done  49 tasks      | elapsed: 33.8min\n","[Parallel(n_jobs=-1)]: Done  50 tasks      | elapsed: 33.8min\n","[Parallel(n_jobs=-1)]: Done  51 tasks      | elapsed: 35.1min\n","[Parallel(n_jobs=-1)]: Done  52 tasks      | elapsed: 35.2min\n","[Parallel(n_jobs=-1)]: Done  53 tasks      | elapsed: 36.5min\n","[Parallel(n_jobs=-1)]: Done  54 tasks      | elapsed: 36.5min\n","[Parallel(n_jobs=-1)]: Done  55 tasks      | elapsed: 37.9min\n","[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed: 37.9min\n","[Parallel(n_jobs=-1)]: Done  57 tasks      | elapsed: 39.2min\n","[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed: 40.6min remaining:    0.0s\n","[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed: 40.6min finished\n"],"name":"stderr"},{"output_type":"stream","text":["Best params for roc_auc_score\n","{'class_weight': {1: 0.122, 0: 0.877}, 'learning_rate': 0.9, 'max_depth': 5, 'min_child_weight': 10, 'n_estimators': 500, 'scale_pos_weight': 0.9, 'subsample': 0.3}\n","\n","Confusion matrix of Random Forest optimized for roc_auc_score on the test data:\n","     pred_neg  pred_pos\n","neg     14661      1596\n","pos      1655       617\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"j5w1_DJPAhmL","executionInfo":{"status":"ok","timestamp":1605792736442,"user_tz":-330,"elapsed":873,"user":{"displayName":"Mansi Gupta","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg-eSqQt6zvClgSY0_QT_GIvO_LGNz3Cau1wv1S7w=s64","userId":"09741357658171476289"}}},"source":["\n","gsf=XGBClassifier(base_score=0.5, booster='gbtree',\n","              class_weight={0: 0.877, 1: 0.122}, colsample_bylevel=1,\n","              colsample_bynode=1, colsample_bytree=1, gamma=0,\n","              learning_rate=0.9, max_delta_step=0, max_depth=5,\n","              min_child_weight=10, missing=None, n_estimators=500, n_jobs=1,\n","              nthread=None, objective='binary:logistic', random_state=0,\n","              reg_alpha=0, reg_lambda=1, scale_pos_weight=0.9, seed=None,\n","              silent=None, subsample=0.3, verbosity=1)"],"execution_count":42,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"wNEMuM_joOHW","executionInfo":{"status":"ok","timestamp":1605724341503,"user_tz":-330,"elapsed":2327,"user":{"displayName":"Mansi Gupta","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg-eSqQt6zvClgSY0_QT_GIvO_LGNz3Cau1wv1S7w=s64","userId":"09741357658171476289"}},"outputId":"11c79f78-7e12-4b91-d859-4b32167c154a"},"source":["gsf.fit(xtrain[indx],ytrain)\n","preds = gsf.predict(xtest[indx])\n","roc_auc_score(ytest, preds)"],"execution_count":61,"outputs":[{"output_type":"execute_result","data":{"text/plain":["XGBClassifier(base_score=0.5, booster='gbtree',\n","              class_weight={0: 0.877, 1: 0.122}, colsample_bylevel=1,\n","              colsample_bynode=1, colsample_bytree=1, gamma=0,\n","              learning_rate=0.9, max_delta_step=0, max_depth=5,\n","              min_child_weight=10, missing=None, n_estimators=500, n_jobs=1,\n","              nthread=None, objective='binary:logistic', random_state=0,\n","              reg_alpha=0, reg_lambda=1, scale_pos_weight=0.9, seed=None,\n","              silent=None, subsample=0.3, verbosity=1)"]},"metadata":{"tags":[]},"execution_count":61}]},{"cell_type":"code","metadata":{"id":"lOEniusYuToh","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1605793788954,"user_tz":-330,"elapsed":245146,"user":{"displayName":"Mansi Gupta","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg-eSqQt6zvClgSY0_QT_GIvO_LGNz3Cau1wv1S7w=s64","userId":"09741357658171476289"}},"outputId":"e6b29b58-59c5-46b3-b147-188cd9096fc1"},"source":["from xgboost import XGBClassifier\n","xgb_model= XGBClassifier()\n","param_tuning = {\n","        'learning_rate': [0.25],\n","        \"class_weight\":[{0:0.877,1:0.123}],\n","        'max_depth': [3],\n","        'min_child_weight': [5],\n","        'subsample': [0.7],\n","        'colsample_bytree': [0.05,0.1,0.2,0.5, 0.7,0.9],\n","        'n_estimators' : [500] \n","}\n","\n","xgbsearch3 = GridSearchCV(estimator = xgb_model,\n","                           param_grid = param_tuning,                        \n","                           cv = 5,scoring ='roc_auc',\n","                           n_jobs = 20,\n","                           verbose = 30)\n","\n","xgbsearch3.fit(xtrain[indx],ytrain)\n","print(xgbsearch3.best_params_)\n","report(xgbsearch3.cv_results_,5)"],"execution_count":53,"outputs":[{"output_type":"stream","text":["Fitting 5 folds for each of 6 candidates, totalling 30 fits\n"],"name":"stdout"},{"output_type":"stream","text":["[Parallel(n_jobs=20)]: Using backend LokyBackend with 20 concurrent workers.\n","[Parallel(n_jobs=20)]: Done   1 tasks      | elapsed:   27.2s\n","[Parallel(n_jobs=20)]: Done   3 out of  30 | elapsed:   27.4s remaining:  4.1min\n","[Parallel(n_jobs=20)]: Done   5 out of  30 | elapsed:   28.4s remaining:  2.4min\n","[Parallel(n_jobs=20)]: Done   7 out of  30 | elapsed:  1.9min remaining:  6.3min\n","[Parallel(n_jobs=20)]: Done   9 out of  30 | elapsed:  1.9min remaining:  4.5min\n","[Parallel(n_jobs=20)]: Done  11 out of  30 | elapsed:  2.2min remaining:  3.8min\n","[Parallel(n_jobs=20)]: Done  13 out of  30 | elapsed:  2.2min remaining:  2.9min\n","[Parallel(n_jobs=20)]: Done  15 out of  30 | elapsed:  2.2min remaining:  2.2min\n","[Parallel(n_jobs=20)]: Done  17 out of  30 | elapsed:  2.8min remaining:  2.2min\n","[Parallel(n_jobs=20)]: Done  19 out of  30 | elapsed:  2.8min remaining:  1.6min\n","[Parallel(n_jobs=20)]: Done  21 out of  30 | elapsed:  3.1min remaining:  1.3min\n","[Parallel(n_jobs=20)]: Done  23 out of  30 | elapsed:  3.7min remaining:  1.1min\n","[Parallel(n_jobs=20)]: Done  25 out of  30 | elapsed:  3.7min remaining:   44.7s\n","[Parallel(n_jobs=20)]: Done  27 out of  30 | elapsed:  3.8min remaining:   25.4s\n","[Parallel(n_jobs=20)]: Done  30 out of  30 | elapsed:  3.8min finished\n"],"name":"stderr"},{"output_type":"stream","text":["{'class_weight': {0: 0.877, 1: 0.123}, 'colsample_bytree': 0.5, 'learning_rate': 0.25, 'max_depth': 3, 'min_child_weight': 5, 'n_estimators': 500, 'subsample': 0.7}\n","Model with rank: 1\n","Mean validation score: 0.727 (std: 0.00236)\n","Parameters: {'class_weight': {0: 0.877, 1: 0.123}, 'colsample_bytree': 0.5, 'learning_rate': 0.25, 'max_depth': 3, 'min_child_weight': 5, 'n_estimators': 500, 'subsample': 0.7}\n","\n","Model with rank: 2\n","Mean validation score: 0.726 (std: 0.00308)\n","Parameters: {'class_weight': {0: 0.877, 1: 0.123}, 'colsample_bytree': 0.7, 'learning_rate': 0.25, 'max_depth': 3, 'min_child_weight': 5, 'n_estimators': 500, 'subsample': 0.7}\n","\n","Model with rank: 3\n","Mean validation score: 0.725 (std: 0.00295)\n","Parameters: {'class_weight': {0: 0.877, 1: 0.123}, 'colsample_bytree': 0.9, 'learning_rate': 0.25, 'max_depth': 3, 'min_child_weight': 5, 'n_estimators': 500, 'subsample': 0.7}\n","\n","Model with rank: 4\n","Mean validation score: 0.724 (std: 0.00399)\n","Parameters: {'class_weight': {0: 0.877, 1: 0.123}, 'colsample_bytree': 0.2, 'learning_rate': 0.25, 'max_depth': 3, 'min_child_weight': 5, 'n_estimators': 500, 'subsample': 0.7}\n","\n","Model with rank: 5\n","Mean validation score: 0.715 (std: 0.00595)\n","Parameters: {'class_weight': {0: 0.877, 1: 0.123}, 'colsample_bytree': 0.05, 'learning_rate': 0.25, 'max_depth': 3, 'min_child_weight': 5, 'n_estimators': 500, 'subsample': 0.7}\n","\n","Model with rank: 5\n","Mean validation score: 0.715 (std: 0.00595)\n","Parameters: {'class_weight': {0: 0.877, 1: 0.123}, 'colsample_bytree': 0.1, 'learning_rate': 0.25, 'max_depth': 3, 'min_child_weight': 5, 'n_estimators': 500, 'subsample': 0.7}\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"3jt68j_r1Hg6","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1605797334880,"user_tz":-330,"elapsed":13030,"user":{"displayName":"Mansi Gupta","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg-eSqQt6zvClgSY0_QT_GIvO_LGNz3Cau1wv1S7w=s64","userId":"09741357658171476289"}},"outputId":"b67aae6e-aa5e-4662-f2f5-dd8d9e73a00c"},"source":["xgbfinal = xgbsearch3.best_estimator_\n","xgbfinal.fit(xtrain[indx],ytrain)\n","preds=xgbfinal.predict(xtest[indx])\n","print(confusion_matrix(ytest,preds))"],"execution_count":55,"outputs":[{"output_type":"stream","text":["[[16002   255]\n"," [ 1773   499]]\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"-PYdQD4K3yVl"},"source":["\n"],"execution_count":null,"outputs":[]}]}