{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Vamos a trabajar con el dataset del titanic: https://www.kaggle.com/c/titanic/overview" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "titanic = pd.read_csv('titanic/train.csv')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
0103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
4503Allen, Mr. William Henrymale35.0003734508.0500NaNS
\n", "
" ], "text/plain": [ " PassengerId Survived Pclass \\\n", "0 1 0 3 \n", "1 2 1 1 \n", "2 3 1 3 \n", "3 4 1 1 \n", "4 5 0 3 \n", "\n", " Name Sex Age SibSp \\\n", "0 Braund, Mr. Owen Harris male 22.0 1 \n", "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", "2 Heikkinen, Miss. Laina female 26.0 0 \n", "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", "4 Allen, Mr. William Henry male 35.0 0 \n", "\n", " Parch Ticket Fare Cabin Embarked \n", "0 0 A/5 21171 7.2500 NaN S \n", "1 0 PC 17599 71.2833 C85 C \n", "2 0 STON/O2. 3101282 7.9250 NaN S \n", "3 0 113803 53.1000 C123 S \n", "4 0 373450 8.0500 NaN S " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "titanic.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Información básica de las columnas:" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 891 entries, 0 to 890\n", "Data columns (total 12 columns):\n", "PassengerId 891 non-null int64\n", "Survived 891 non-null int64\n", "Pclass 891 non-null int64\n", "Name 891 non-null object\n", "Sex 891 non-null object\n", "Age 714 non-null float64\n", "SibSp 891 non-null int64\n", "Parch 891 non-null int64\n", "Ticket 891 non-null object\n", "Fare 891 non-null float64\n", "Cabin 204 non-null object\n", "Embarked 889 non-null object\n", "dtypes: float64(2), int64(5), object(5)\n", "memory usage: 83.6+ KB\n" ] } ], "source": [ "titanic.info()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Seleccionamos dos columnas para estudiar las odds ratio y las proporciones" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SurvivedSex
00male
11female
21female
31female
40male
\n", "
" ], "text/plain": [ " Survived Sex\n", "0 0 male\n", "1 1 female\n", "2 1 female\n", "3 1 female\n", "4 0 male" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dt = titanic[['Survived','Sex']]\n", "dt.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Vamos a ver el porcentaje de mujeres y hombres:" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "male 577\n", "female 314\n", "Name: Sex, dtype: int64" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dt['Sex'].value_counts()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "male 64.758698\n", "female 35.241302\n", "Name: Sex, dtype: float64" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "100*dt['Sex'].value_counts()/len(dt['Sex'])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Podemos hacer un bar plot:" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "import seaborn as sns" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEKCAYAAAAIO8L1AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAEchJREFUeJzt3XuwXWV9xvHvA0GteAlIoDShjZeMl9oqcETUtoPiqFAr1BYvVYk007RTRB17kdpp7VStWm1VHIvNiBocq1LUEh2qpShaa7WeVOQiOqTokNNQORS5WEYc9Nc/9ht7DG+SnUPW2cdzvp+ZPWutd7177V8mK+fJ+6691klVIUnSrg6YdAGSpMXJgJAkdRkQkqQuA0KS1GVASJK6DAhJUpcBIUnqMiAkSV2DBkSSlUkuTPK1JNckeUKSQ5NckuTatjyk9U2Sc5JsS3JFkmOGrE2StGcZ8k7qJJuBf6mqdyW5F3Bf4FXAzVX1hiRnA4dU1SuTnAycBZwMPB54W1U9fk/HP+yww2rt2rWD1S9JS9HWrVtvqqpVe+s3WEAkeQDwFeAhNedDknwdOKGqbkhyJHBZVT08yd+29Q/s2m93nzE1NVXT09OD1C9JS1WSrVU1tbd+Q04xPQSYBd6T5MtJ3pXkYOCInT/02/Lw1n81sH3O+2da249IsjHJdJLp2dnZAcuXpOVtyIBYARwDnFtVRwP/C5y9h/7ptN1teFNVm6pqqqqmVq3a6whJkjRPQwbEDDBTVV9s2xcyCoxvtakl2vLGOf2PmvP+NcCOAeuTJO3BYAFRVf8NbE/y8NZ0IvBVYAuwvrWtBy5q61uA09u3mY4Hbt3T9QdJ0rBWDHz8s4D3t28wXQecwSiULkiyAbgeOK31vZjRN5i2AXe0vpKkCRk0IKrqcqB3pfzETt8CzhyyHknS+LyTWpLUZUBIkroMCElS19AXqRe9Y//g/EmXoEVo65tOn3QJ0sQ5gpAkdRkQkqQuA0KS1GVASJK6DAhJUpcBIUnqMiAkSV0GhCSpy4CQJHUZEJKkLgNCktRlQEiSugwISVKXASFJ6jIgJEldBoQkqcuAkCR1GRCSpC4DQpLUZUBIkroMCElSlwEhSeoyICRJXYMGRJJvJrkyyeVJplvboUkuSXJtWx7S2pPknCTbklyR5Jgha5Mk7dlCjCCeXFWPraqptn02cGlVrQMubdsAJwHr2msjcO4C1CZJ2o1JTDGdAmxu65uBU+e0n18jXwBWJjlyAvVJkhg+IAr4pyRbk2xsbUdU1Q0AbXl4a18NbJ/z3pnW9iOSbEwynWR6dnZ2wNIlaXlbMfDxn1RVO5IcDlyS5Gt76JtOW92toWoTsAlgamrqbvslSfvHoCOIqtrRljcCHwWOA761c+qoLW9s3WeAo+a8fQ2wY8j6JEm7N1hAJDk4yf13rgNPA64CtgDrW7f1wEVtfQtwevs20/HArTunoiRJC2/IKaYjgI8m2fk5f1dVn0jyJeCCJBuA64HTWv+LgZOBbcAdwBkD1iZJ2ovBAqKqrgMe02n/H+DETnsBZw5VjyRp33gntSSpy4CQJHUZEJKkLgNCktRlQEiSugwISVKXASFJ6jIgJEldBoQkqcuAkCR1GRCSpC4DQpLUZUBIkroMCElSlwEhSeoyICRJXQaEJKnLgJAkdRkQkqQuA0KS1GVASJK6DAhJUpcBIUnqMiAkSV0GhCSpy4CQJHUZEJKkrsEDIsmBSb6c5ONt+8FJvpjk2iQfSnKv1n7vtr2t7V87dG2SpN1biBHEy4Br5my/EXhLVa0Dvg1saO0bgG9X1cOAt7R+kqQJGTQgkqwBfhl4V9sO8BTgwtZlM3BqWz+lbdP2n9j6S5ImYOgRxFuBPwR+0LYfBNxSVXe17RlgdVtfDWwHaPtvbf1/RJKNSaaTTM/Ozg5ZuyQta4MFRJJnAjdW1da5zZ2uNca+/2+o2lRVU1U1tWrVqv1QqSSpZ8WAx34S8KwkJwP3AR7AaESxMsmKNkpYA+xo/WeAo4CZJCuABwI3D1ifJGkPBhtBVNUfVdWaqloLPA/4VFW9APg08Out23rgora+pW3T9n+qqu42gpAkLYxJ3AfxSuAVSbYxusZwXms/D3hQa38FcPYEapMkNUNOMf1QVV0GXNbWrwOO6/T5LnDaQtQjSdo776SWJHUZEJKkLgNCktRlQEiSugwISVKXASFJ6jIgJEldBoQkqcuAkCR1GRCSpC4DQpLUZUBIkroMCElSlwEhSeoyICRJXQaEJKnLgJAkdY0VEEkuHadNkrR07PFXjia5D3Bf4LAkhwBpux4A/NTAtUmSJmhvv5P6t4GXMwqDrfx/QNwGvGPAuiRJE7bHgKiqtwFvS3JWVb19gWqSJC0CextBAFBVb0/yRGDt3PdU1fkD1SVJmrCxAiLJ+4CHApcD32/NBRgQkrREjRUQwBTwqKqqIYuRJC0e494HcRXwk0MWIklaXMYdQRwGfDXJvwN37mysqmcNUpUkrv/zn5t0CVqEfvpPr1ywzxo3IP5sXw/c7qH4LHDv9jkXVtWrkzwY+CBwKPAfwIuq6ntJ7s3omsaxwP8Az62qb+7r50qS9o9xv8X0mXkc+07gKVX1nSQHAZ9L8o/AK4C3VNUHk7wT2ACc25bfrqqHJXke8EbgufP4XEnSfjDuozZuT3Jbe303yfeT3Lan99TId9rmQe1VwFOAC1v7ZuDUtn5K26btPzHJzhvzJEkLbNwRxP3nbic5FThub+9LciCjO7AfxujO6/8Ebqmqu1qXGWB1W18NbG+fd1eSW4EHATeNU6Mkaf+a19Ncq+ofGI0E9tbv+1X1WGANo0B5ZK9bW/ZGC3f7Wm2SjUmmk0zPzs7uQ9WSpH0x7o1yz56zeQCj+yLGvieiqm5JchlwPLAyyYo2ilgD7GjdZoCjgJkkK4AHAjd3jrUJ2AQwNTXlfRmSNJBxRxC/Muf1dOB2RtcMdivJqiQr2/pPAE8FrgE+Dfx667YeuKitb2nbtP2f8sY8SZqcca9BnDGPYx8JbG7XIQ4ALqiqjyf5KvDBJK8Fvgyc1/qfB7wvyTZGI4fnzeMzJUn7ybhTTGuAtwNPYjS19DngZVU1s7v3VNUVwNGd9uvoXOCuqu8Cp41XtiRpaONOMb2H0RTQTzH6ttHHWpskaYkaNyBWVdV7ququ9novsGrAuiRJEzZuQNyU5IVJDmyvFzJ6HIYkaYkaNyB+E3gO8N/ADYy+ZTSfC9eSpB8T4z6s7zXA+qr6NkCSQ4E3MwoOSdISNO4I4ud3hgNAVd1M5xtKkqSlY9yAOCDJITs32ghi3NGHJOnH0Lg/5P8K+HySCxndB/Ec4HWDVSVJmrhx76Q+P8k0owf0BXh2VX110MokSRM19jRRCwRDQZKWiXk97luStPQZEJKkLgNCktRlQEiSugwISVKXASFJ6jIgJEldBoQkqcuAkCR1GRCSpC4DQpLUZUBIkroMCElSlwEhSeoyICRJXQaEJKnLgJAkdQ0WEEmOSvLpJNckuTrJy1r7oUkuSXJtWx7S2pPknCTbklyR5JihapMk7d2QI4i7gN+rqkcCxwNnJnkUcDZwaVWtAy5t2wAnAevaayNw7oC1SZL2YrCAqKobquo/2vrtwDXAauAUYHPrthk4ta2fApxfI18AViY5cqj6JEl7tiDXIJKsBY4GvggcUVU3wChEgMNbt9XA9jlvm2ltkqQJGDwgktwP+DDw8qq6bU9dO23VOd7GJNNJpmdnZ/dXmZKkXQwaEEkOYhQO76+qj7Tmb+2cOmrLG1v7DHDUnLevAXbsesyq2lRVU1U1tWrVquGKl6RlbshvMQU4D7imqv56zq4twPq2vh64aE776e3bTMcDt+6cipIkLbwVAx77ScCLgCuTXN7aXgW8AbggyQbgeuC0tu9i4GRgG3AHcMaAtUmS9mKwgKiqz9G/rgBwYqd/AWcOVY8kad94J7UkqcuAkCR1GRCSpC4DQpLUZUBIkroMCElSlwEhSeoyICRJXQaEJKnLgJAkdRkQkqQuA0KS1GVASJK6DAhJUpcBIUnqMiAkSV0GhCSpy4CQJHUZEJKkLgNCktRlQEiSugwISVKXASFJ6jIgJEldBoQkqcuAkCR1GRCSpK7BAiLJu5PcmOSqOW2HJrkkybVteUhrT5JzkmxLckWSY4aqS5I0niFHEO8FnrFL29nApVW1Dri0bQOcBKxrr43AuQPWJUkaw2ABUVWfBW7epfkUYHNb3wycOqf9/Br5ArAyyZFD1SZJ2ruFvgZxRFXdANCWh7f21cD2Of1mWpskaUIWy0XqdNqq2zHZmGQ6yfTs7OzAZUnS8rXQAfGtnVNHbXlja58BjprTbw2wo3eAqtpUVVNVNbVq1apBi5Wk5WyhA2ILsL6trwcumtN+evs20/HArTunoiRJk7FiqAMn+QBwAnBYkhng1cAbgAuSbACuB05r3S8GTga2AXcAZwxVlyRpPIMFRFU9fze7Tuz0LeDMoWqRJO27xXKRWpK0yBgQkqQuA0KS1GVASJK6DAhJUpcBIUnqMiAkSV0GhCSpy4CQJHUZEJKkLgNCktRlQEiSugwISVKXASFJ6jIgJEldBoQkqcuAkCR1GRCSpC4DQpLUZUBIkroMCElSlwEhSeoyICRJXQaEJKnLgJAkdRkQkqQuA0KS1LWoAiLJM5J8Pcm2JGdPuh5JWs4WTUAkORB4B3AS8Cjg+UkeNdmqJGn5WjQBARwHbKuq66rqe8AHgVMmXJMkLVuLKSBWA9vnbM+0NknSBKyYdAFzpNNWd+uUbAQ2ts3vJPn6oFUtL4cBN026iMUgb14/6RL0ozw3d3p170flPvuZcTotpoCYAY6as70G2LFrp6raBGxaqKKWkyTTVTU16TqkXXluTsZimmL6ErAuyYOT3At4HrBlwjVJ0rK1aEYQVXVXkpcAnwQOBN5dVVdPuCxJWrYWTUAAVNXFwMWTrmMZc+pOi5Xn5gSk6m7XgSVJWlTXICRJi4gBoa4kJyT5+KTr0NKQ5KVJrkny/oGO/2dJfn+IYy9ni+oahKQl63eBk6rqG5MuRONzBLGEJVmb5GtJ3pXkqiTvT/LUJP+a5Nokx7XX55N8uS0f3jnOwUneneRLrZ+PQNHYkrwTeAiwJckf986lJC9O8g9JPpbkG0lekuQVrc8Xkhza+v1We+9Xknw4yX07n/fQJJ9IsjXJvyR5xML+iZcOA2LpexjwNuDngUcAvwH8AvD7wKuArwG/VFVHA38K/EXnGH8MfKqqHgc8GXhTkoMXoHYtAVX1O4xuen0ycDC7P5cezej8PA54HXBHOy//DTi99flIVT2uqh4DXANs6HzkJuCsqjqW0Xn+N8P8yZY+p5iWvm9U1ZUASa4GLq2qSnIlsBZ4ILA5yTpGjzY5qHOMpwHPmjPHex/gpxn9A5X2xe7OJYBPV9XtwO1JbgU+1tqvZPQfHIBHJ3ktsBK4H6P7pn4oyf2AJwJ/n/zwkRT3HuIPshwYEEvfnXPWfzBn+weM/v5fw+gf5q8mWQtc1jlGgF+rKp97pXuqey4leTx7P1cB3gucWlVfSfJi4IRdjn8AcEtVPXb/lr08OcWkBwL/1dZfvJs+nwTOSvsvWZKjF6AuLU339Fy6P3BDkoOAF+y6s6puA76R5LR2/CR5zD2sedkyIPSXwOuT/CujR5z0vIbR1NMVSa5q29J83NNz6U+ALwKXMLp+1vMCYEOSrwBX4++VmTfvpJYkdTmCkCR1GRCSpC4DQpLUZUBIkroMCElSlwEhzVN7rtDVSa5Icnm72UtaMryTWpqHJE8AngkcU1V3JjkMuNeEy5L2K0cQ0vwcCdxUVXcCVNVNVbUjybFJPtOeJPrJJEcmWdGeQHoCQJLXJ3ndJIuXxuGNctI8tIfCfQ64L/DPwIeAzwOfAU6pqtkkzwWeXlW/meRngQuBlzK6e/3xVfW9yVQvjccpJmkequo7SY4FfpHRY6s/BLyW0SOrL2mPGjoQuKH1vzrJ+xg9ofQJhoN+HBgQ0jxV1fcZPf32svb49DOBq6vqCbt5y88BtwBHLEyF0j3jNQhpHpI8vP0OjZ0ey+j3Y6xqF7BJclCbWiLJs4EHAb8EnJNk5ULXLO0rr0FI89Cml97O6BfX3AVsAzYCa4BzGD1GfQXwVuCjjK5PnFhV25O8FDi2qtZPonZpXAaEJKnLKSZJUpcBIUnqMiAkSV0GhCSpy4CQJHUZEJKkLgNCktRlQEiSuv4PoyuAlRWoZYIAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "sns.countplot(dt['Sex'])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Podemos sacar una tabla como la de antes:" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SexfemalemaleAll
Survived
081468549
1233109342
All314577891
\n", "
" ], "text/plain": [ "Sex female male All\n", "Survived \n", "0 81 468 549\n", "1 233 109 342\n", "All 314 577 891" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.crosstab(index=dt['Survived'],columns=dt['Sex'], margins=True) # margins para sacar los totales" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Podemos hacer la tabla con las proporciones/probabilidades de cada clase:" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SexfemalemaleAll
Survived
09.09090952.52525361.616162
126.15039312.23344638.383838
All35.24130264.758698100.000000
\n", "
" ], "text/plain": [ "Sex female male All\n", "Survived \n", "0 9.090909 52.525253 61.616162\n", "1 26.150393 12.233446 38.383838\n", "All 35.241302 64.758698 100.000000" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.crosstab(index=dt['Survived'],columns=dt['Sex'], margins=True).apply(lambda r: r*100/len(dt))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Igual pero relativo por cada fila:" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Sexfemalemale
Survived
014.75409885.245902
168.12865531.871345
\n", "
" ], "text/plain": [ "Sex female male\n", "Survived \n", "0 14.754098 85.245902\n", "1 68.128655 31.871345" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.crosstab(index=dt['Survived'],columns=dt['Sex']).apply(lambda r: r*100/r.sum(), axis=1)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.31871345029239767" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# De los que sobreviven cuántos son hombres\n", "109/342" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Relativo por columnas:" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Sexfemalemale
Survived
025.79617881.109185
174.20382218.890815
\n", "
" ], "text/plain": [ "Sex female male\n", "Survived \n", "0 25.796178 81.109185\n", "1 74.203822 18.890815" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.crosstab(index=dt['Survived'],columns=dt['Sex']).apply(lambda r: r*100/r.sum(), axis=0)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.18890814558058924" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# De los hombres, cuántos sobreviven\n", "109/577" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Podemos hacer otro barplot:" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAEGCAYAAAB1iW6ZAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAFbpJREFUeJzt3X+QVeWd5/H3FxAZBEWkpRBMYAfiD0LQ2GhGqzYoGpKZUaxsTMi6FkYTNv5AySblkKgVUolZrbLMJtmoaUcHtDQq+AOKJI6KEM2GRUFQMIyDOkR7ZLBlAxFnEBq/+0dfDGLjvd19b1/68H5VUefHfc4539vVfPrpp899TmQmkqSer1e9C5AkVYeBLkkFYaBLUkEY6JJUEAa6JBWEgS5JBWGgS1JBGOiSVBAGuiQVRJ/uvNiQIUNy5MiR3XlJSerxVq5c+WZmNpRr162BPnLkSFasWNGdl5SkHi8i/lBJO4dcJKkgDHRJKggDXZIKolvH0CUJYOfOnTQ3N7N9+/Z6l7Jf6devHyNGjOCggw7q1PEGuqRu19zczMCBAxk5ciQRUe9y9guZyebNm2lubmbUqFGdOodDLpK63fbt2zniiCMM8z1EBEcccUSXfmsx0CXVhWH+QV39mhjoklQQFY2hR8Q3gK8CCawBvgIMA+4FBgPPAhdk5o4a1an2zD6s3hWUN3trvSuQuO6667jnnnvo3bs3vXr14uc//zmnnHJKvcuqurKBHhHDgSuA4zPzPyLifmAq8NfAjzLz3oi4FbgYuKWm1UpSBy1btoxFixbx7LPPcvDBB/Pmm2+yY0cx+56VDrn0Af4iIvoA/YGNwBnA/NLrc4Fzq1+eJHXNxo0bGTJkCAcffDAAQ4YM4aijjmLlypV8+tOf5qSTTmLy5Mls3LiR1tZWJkyYwNKlSwH49re/zdVXX13H6jumbKBn5r8CNwKv0hbkW4GVwJbMbC01awaGt3d8REyPiBURsaKlpaU6VUtShT7zmc/w2muv8bGPfYxLL72U3/zmN+zcuZMZM2Ywf/58Vq5cyUUXXcTVV19Nnz59mDNnDpdccgmPPfYYjzzyCN/97nfr/RYqVsmQy+HAFGAUsAWYB3yunabZ3vGZ2QQ0ATQ2NrbbRpJqZcCAAaxcuZKnnnqKJUuW8KUvfYlrrrmGtWvXctZZZwGwa9cuhg0bBsDYsWO54IILOPvss1m2bBl9+/atZ/kdUskfRc8E/iUzWwAi4kHgVGBQRPQp9dJHAK/XrkxJ6rzevXszceJEJk6cyLhx4/jZz37G2LFjWbZsWbvt16xZw6BBg9i0aVM3V9o1lYyhvwp8KiL6R9tNkpOA3wNLgC+U2kwDFtSmREnqvBdffJH169e/t7169WqOO+44Wlpa3gv0nTt38sILLwDw4IMPsnnzZp588kmuuOIKtmzZUpe6O6NsDz0zl0fEfNpuTWwFVtE2hPJL4N6I+EFp3+21LFSSOmPbtm3MmDGDLVu20KdPH0aPHk1TUxPTp0/niiuuYOvWrbS2tjJz5kyGDh3KrFmzWLx4MUcffTSXX345V155JXPnzq3326hIZHbfsHZjY2P6gIsq8j509VDr1q3juOOOq3cZ+6X2vjYRsTIzG8sd6ydFJakgDHRJKggDXZIKwkCXpIIw0CWpIAx0SSoIH0Enqe5GzvplVc+34fq/KdvmJz/5Cbfccguf/OQnufvuu6t6fYDZs2czYMAAvvWtb1X93PtioEs6IN188838+te/7vTzO/dHBrqkA87Xv/51XnnlFc455xymTp3Kyy+/zJo1a2htbWX27NlMmTKFOXPm8PDDD7Nr1y7Wrl3LN7/5TXbs2MFdd93FwQcfzK9+9SsGDx7MbbfdRlNTEzt27GD06NHcdddd9O/f/33Xe/nll7nssstoaWmhf//+3HbbbRx77LFVf18GutSDVHtoolYqGfKop1tvvZVHHnmEJUuWcNNNN3HGGWdwxx13sGXLFk4++WTOPPNMANauXcuqVavYvn07o0eP5oYbbmDVqlV84xvf4M4772TmzJl8/vOf52tf+xoA11xzDbfffjszZsx43/WmT5/OrbfeypgxY1i+fDmXXnopTzzxRNXfl4Eu6YD26KOPsnDhQm688UYAtm/fzquvvgrA6aefzsCBAxk4cCCHHXYYZ599NgDjxo3j+eefB9pC/5prrmHLli1s27aNyZMnv+/827Zt43e/+x3nnXfee/veeeedmrwXA13SAS0zeeCBBzjmmGPet3/58uXvPeUIoFevXu9t9+rVi9bWtuf7XHjhhTz88MOMHz+eOXPmvPe0o93effddBg0axOrVq2v7RvC2RUkHuMmTJ/PTn/6U3RMVrlq1qkPHv/XWWwwbNoydO3e2e7fMoYceyqhRo5g3bx7Q9gPkueee63rh7bCHLqnu6jnmfu211zJz5kw+8YlPkJmMHDmSRYsWVXz897//fU455RQ++tGPMm7cON56660PtLn77ru55JJL+MEPfsDOnTuZOnUq48ePr+bbAJw+t2dz+twDTlH+KOr0ufvm9LmSpPKBHhHHRMTqPf79KSJmRsTgiHgsItaXlod3R8GSpPaVDfTMfDEzT8jME4CTgH8HHgJmAYszcwywuLQtSaqTjg65TAJezsw/AFOA3Q/amwucW83CJEkd09FAnwr8orQ+NDM3ApSWR1azMElSx1Qc6BHRFzgHmNeRC0TE9IhYERErWlpaOlqfJKlCHbkP/XPAs5m5qbS9KSKGZebGiBgGvNHeQZnZBDRB222LXapWUjFV+xbcGt8uu3TpUm688cYO3a/eHToy5PJl/jzcArAQmFZanwYsqFZRkqSOqyjQI6I/cBbw4B67rwfOioj1pdeur355klQbGzZs4Nhjj+WrX/0qH//4xzn//PN5/PHHOe200xgzZgxPP/00Tz/9NKeeeionnngip556Ki+++OIHzvP2229z0UUXMWHCBE488UQWLKhf37aiIZfM/HfgiL32babtrhdJ6pFeeukl5s2bR1NTExMmTOCee+7ht7/9LQsXLuSHP/whd955J08++SR9+vTh8ccf5zvf+Q4PPPDA+85x3XXXtTv97iGHHNLt78e5XCQdsEaNGsW4ceMAGDt2LJMmTSIiGDduHBs2bGDr1q1MmzaN9evXExHs3LnzA+fY1/S79ZjawECXdMAqNz3utddey+mnn85DDz3Ehg0bmDhx4gfOsa/pd+vBuVwkaR+2bt3K8OHDAZgzZ067bbo6/W412UOXVH/76aycV111FdOmTXvvMXXt6er0u9Xk9Lk9mdPnHnCcPrf4nD5XkmSgS1JRGOiS6qI7h3t7iq5+TQx0Sd2uX79+bN682VDfQ2ayefNm+vXr1+lzeJeLpG43YsQImpubcQbW9+vXrx8jRozo9PEGuqRud9BBBzFq1Kh6l1E4DrlIUkEY6JJUEAa6JBWEgS5JBWGgS1JBGOiSVBCVPoJuUETMj4h/ioh1EfFXETE4Ih6LiPWl5eG1LlaStG+V9tB/DDySmccC44F1wCxgcWaOARaXtiVJdVI20CPiUOA/A7cDZOaOzNwCTAHmlprNBc6tVZGSpPIq6aH/J6AF+IeIWBURfx8RhwBDM3MjQGl5ZHsHR8T0iFgRESv8mK8k1U4lgd4H+CRwS2aeCLxNB4ZXMrMpMxszs7GhoaGTZUqSyqkk0JuB5sxcXtqeT1vAb4qIYQCl5Ru1KVGSVImygZ6Z/wa8FhG7H2k9Cfg9sBCYVto3DVhQkwolSRWpdLbFGcDdEdEXeAX4Cm0/DO6PiIuBV4HzalOiJKkSFQV6Zq4G2ntA6aTqliNJ6iw/KSpJBWGgS1JBGOiSVBAGuiQVhIEuSQVhoEtSQRjoklQQBrokFYSBLkkFYaBLUkEY6JJUEAa6JBWEgS5JBWGgS1JBGOiSVBAGuiQVREUPuIiIDcBbwC6gNTMbI2IwcB8wEtgAfDEz/1ibMiVJ5XSkh356Zp6QmbufXDQLWJyZY4DFpW1JUp10ZchlCjC3tD4XOLfr5UiSOqvSQE/g0YhYGRHTS/uGZuZGgNLyyFoUKEmqTEVj6MBpmfl6RBwJPBYR/1TpBUo/AKYDfOQjH+lEiZKkSlTUQ8/M10vLN4CHgJOBTRExDKC0fGMfxzZlZmNmNjY0NFSnaknSB5QN9Ig4JCIG7l4HPgOsBRYC00rNpgELalWkJKm8SoZchgIPRcTu9vdk5iMR8Qxwf0RcDLwKnFe7MiVJ5ZQN9Mx8BRjfzv7NwKRaFCVJ6jg/KSpJBWGgS1JBGOiSVBAGuiQVhIEuSQVhoEtSQRjoklQQBrokFYSBLkkFYaBLUkEY6JJUEAa6JBWEgS5JBWGgS1JBGOiSVBAGuiQVhIEuSQVRcaBHRO+IWBURi0rboyJieUSsj4j7IqJv7cqUJJXTkR76lcC6PbZvAH6UmWOAPwIXV7MwSVLHVBToETEC+Bvg70vbAZwBzC81mQucW4sCJUmVqbSH/r+Aq4B3S9tHAFsys7W03QwMb+/AiJgeESsiYkVLS0uXipUk7VvZQI+IvwXeyMyVe+5up2m2d3xmNmVmY2Y2NjQ0dLJMSVI5fSpocxpwTkT8NdAPOJS2HvugiOhT6qWPAF6vXZmSpHLK9tAz89uZOSIzRwJTgScy83xgCfCFUrNpwIKaVSlJKqsr96H/HfA/IuIl2sbUb69OSZKkzqhkyOU9mbkUWFpafwU4ufolSZI6w0+KSlJBGOiSVBAGuiQVhIEuSQVhoEtSQRjoklQQBrokFYSBLkkFYaBLUkEY6JJUEAa6JBWEgS5JBWGgS1JBGOiSVBAGuiQVhIEuSQVhoEtSQZQN9IjoFxFPR8RzEfFCRHyvtH9URCyPiPURcV9E9K19uZKkfamkh/4OcEZmjgdOAD4bEZ8CbgB+lJljgD8CF9euTElSOWUDPdtsK20eVPqXwBnA/NL+ucC5NalQklSRisbQI6J3RKwG3gAeA14GtmRma6lJMzB8H8dOj4gVEbGipaWlGjVLktpRUaBn5q7MPAEYAZwMHNdes30c25SZjZnZ2NDQ0PlKJUkfqkN3uWTmFmAp8ClgUET0Kb00Ani9uqVJkjqikrtcGiJiUGn9L4AzgXXAEuALpWbTgAW1KlKSVF6f8k0YBsyNiN60/QC4PzMXRcTvgXsj4gfAKuD2GtYpSSqjbKBn5vPAie3sf4W28XRJ0n7AT4pKUkEY6JJUEAa6JBWEgS5JBWGgS1JBVHLb4gFn5Kxf1ruEimzoV+8KJO1P7KFLUkEY6JJUEAa6JBWEgS5JBWGgS1JBGOiSVBAGuiQVhIEuSQVhoEtSQRjoklQQlTyC7uiIWBIR6yLihYi4srR/cEQ8FhHrS8vDa1+uJGlfKumhtwLfzMzjaHs49GURcTwwC1icmWOAxaVtSVKdlA30zNyYmc+W1t+i7QHRw4EpwNxSs7nAubUqUpJUXofG0CNiJG3PF10ODM3MjdAW+sCR+zhmekSsiIgVLS0tXatWkrRPFQd6RAwAHgBmZuafKj0uM5syszEzGxsaGjpToySpAhUFekQcRFuY352ZD5Z2b4qIYaXXhwFv1KZESVIlyj7gIiICuB1Yl5k37fHSQmAacH1puaAmFUrqeWYfVu8KKjN7a70rqKpKnlh0GnABsCYiVpf2fYe2IL8/Ii4GXgXOq02JkqRKlA30zPwtEPt4eVJ1y5EkdZafFJWkgjDQJakgDHRJKggDXZIKwkCXpIIw0CWpIAx0SSoIA12SCsJAl6SCMNAlqSAMdEkqCANdkgrCQJekgjDQJakgDHRJKggDXZIKwkCXpIIoG+gRcUdEvBERa/fYNzgiHouI9aXl4bUtU5JUTiU99DnAZ/faNwtYnJljgMWlbUlSHZUN9Mx8Evh/e+2eAswtrc8Fzq1yXZKkDursGPrQzNwIUFoeua+GETE9IlZExIqWlpZOXk6SVE7N/yiamU2Z2ZiZjQ0NDbW+nCQdsDob6JsiYhhAaflG9UqSJHVGZwN9ITCttD4NWFCdciRJnVXJbYu/AJYBx0REc0RcDFwPnBUR64GzStuSpDrqU65BZn55Hy9NqnItkqQu8JOiklQQBrokFYSBLkkFYaBLUkEY6JJUEAa6JBWEgS5JBWGgS1JBGOiSVBAGuiQVhIEuSQVhoEtSQRjoklQQBrokFYSBLkkFYaBLUkF0KdAj4rMR8WJEvBQRs6pVlCSp4zod6BHRG/gZ8DngeODLEXF8tQqTJHVMV3roJwMvZeYrmbkDuBeYUp2yJEkdVfaZoh9iOPDaHtvNwCl7N4qI6cD00ua2iHixC9fUHgKGAG/Wu44P9b2odwWqgx7xvQk96fvzo5U06kqgt/eVyA/syGwCmrpwHe1DRKzIzMZ61yHtze/N+ujKkEszcPQe2yOA17tWjiSps7oS6M8AYyJiVET0BaYCC6tTliSpozo95JKZrRFxOfCPQG/gjsx8oWqVqRIOZWl/5fdmHUTmB4a9JUk9kJ8UlaSCMNAlqSAMdEkqiK7ch65uFBHH0vZJ3OG03e//OrAwM9fVtTBJ+w176D1ARPwdbVMrBPA0bbeMBvALJ0WTtJt3ufQAEfHPwNjM3LnX/r7AC5k5pj6VSR8uIr6Smf9Q7zoOFPbQe4Z3gaPa2T+s9Jq0v/pevQs4kDiG3jPMBBZHxHr+PCHaR4DRwOV1q0oCIuL5fb0EDO3OWg50Drn0EBHRi7Ypi4fT9h+lGXgmM3fVtTAd8CJiEzAZ+OPeLwG/y8z2frtUDdhD7yEy813g/9a7Dqkdi4ABmbl67xciYmn3l3PgsocuSQXhH0UlqSAMdEkqCANdPVJEXB0RL0TE8xGxOiI+8PjDTpzznGp9UCsitlXjPFJHOIauHici/gq4CZiYme9ExBCgb2aWfWJWRPTJzNZuqHFbZg6o9XWkPdlDV080DHgzM98ByMw3M/P1iNhQCncionH3HRYRMTsimiLiUeDOiFgeEWN3nywilkbESRFxYUT874g4rHSuXqXX+0fEaxFxUET8ZUQ8EhErI+Kp0hw7lJ7ctSwinomI73fz10MCDHT1TI8CR0fEP0fEzRHx6QqOOQmYkpn/lbZ5cb4IEBHDgKMyc+Xuhpm5FXgO2H3es4F/LE290ATMyMyTgG8BN5fa/Bi4JTMnAP/W5XcodYKBrh4nM7fRFtDTgRbgvoi4sMxhCzPzP0rr9wPnlda/CMxrp/19wJdK61NL1xgAnArMi4jVwM9p+20B4DTgF6X1uzr0hqQq8YNF6pFKn5BdCiyNiDXANKCVP3dS+u11yNt7HPuvEbE5Ij5BW2j/93YusRD4nxExmLYfHk8AhwBbMvOEfZXVybcjVYU9dPU4EXFMROw5w+QJwB+ADbSFL8B/KXOae4GrgMMyc83eL5Z+C3iatqGURZm5KzP/BPxLRJxXqiMiYnzpkP9DW08e4PyOvyup6wx09UQDgLkR8fvSxFDHA7Npm9nvxxHxFFBujpv5tAXw/R/S5j7gv5WWu50PXBwRzwEv0PbQEYArgcsi4hngsI69Hak6vG1RkgrCHrokFYSBLkkFYaBLUkEY6JJUEAa6JBWEgS5JBWGgS1JB/H8dqSCI1PCa/wAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "pd.crosstab(index=dt['Survived'],columns=dt['Sex']).apply(lambda r: r*100/r.sum(), axis=0).plot(kind='bar')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Vamos a calcular odds y proporciones:" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.7420382165605095" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "p_mujer_vive=233/314\n", "p_mujer_vive" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "p_mujer_muere=1-p_mujer_vive" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "p_hombre_vive=109/577\n", "p_hombre_muere=1-p_hombre_vive" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2.876543209876542" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Odds mujer\n", "odds_mujer=p_mujer_vive/p_mujer_muere\n", "odds_mujer" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.23290598290598288" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Odds hombre\n", "odds_hombre=p_hombre_vive/p_hombre_muere\n", "odds_hombre" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.08096731594585674" ] }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Odds ratio\n", "odds_ratio=odds_hombre/odds_mujer\n", "odds_ratio" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "12.350662589194696" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "odds_mujer/odds_hombre" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Hay funciones que calculan los odds ratio (pero no entraremos demasiado)" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.08096731594585672" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import scipy.stats as stats\n", "table=pd.crosstab(index=dt['Survived'],columns=dt['Sex'])\n", "oddsratio, pvalue =stats.fisher_exact(table)\n", "\n", "oddsratio" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### El modelo:" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SurvivedSex
00male
11female
21female
31female
40male
\n", "
" ], "text/plain": [ " Survived Sex\n", "0 0 male\n", "1 1 female\n", "2 1 female\n", "3 1 female\n", "4 0 male" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dt.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Para transformar la variable Sex en unos y ceros usamos `get_dummies`:" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SurvivedSex_femaleSex_male
0001
1110
2110
3110
4001
\n", "
" ], "text/plain": [ " Survived Sex_female Sex_male\n", "0 0 0 1\n", "1 1 1 0\n", "2 1 1 0\n", "3 1 1 0\n", "4 0 0 1" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dt=pd.get_dummies(dt)\n", "dt.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Como es redundante tiramos una:" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SurvivedSex_male
001
110
210
310
401
\n", "
" ], "text/plain": [ " Survived Sex_male\n", "0 0 1\n", "1 1 0\n", "2 1 0\n", "3 1 0\n", "4 0 1" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dt=dt.drop('Sex_female',axis=1)\n", "dt.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Hagamos la regresión logística:" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [], "source": [ "from sklearn.linear_model import LogisticRegression" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Help on class LogisticRegression in module sklearn.linear_model.logistic:\n", "\n", "class LogisticRegression(sklearn.base.BaseEstimator, sklearn.linear_model.base.LinearClassifierMixin, sklearn.linear_model.base.SparseCoefMixin)\n", " | LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1, class_weight=None, random_state=None, solver='warn', max_iter=100, multi_class='warn', verbose=0, warm_start=False, n_jobs=None)\n", " | \n", " | Logistic Regression (aka logit, MaxEnt) classifier.\n", " | \n", " | In the multiclass case, the training algorithm uses the one-vs-rest (OvR)\n", " | scheme if the 'multi_class' option is set to 'ovr', and uses the cross-\n", " | entropy loss if the 'multi_class' option is set to 'multinomial'.\n", " | (Currently the 'multinomial' option is supported only by the 'lbfgs',\n", " | 'sag' and 'newton-cg' solvers.)\n", " | \n", " | This class implements regularized logistic regression using the\n", " | 'liblinear' library, 'newton-cg', 'sag' and 'lbfgs' solvers. It can handle\n", " | both dense and sparse input. Use C-ordered arrays or CSR matrices\n", " | containing 64-bit floats for optimal performance; any other input format\n", " | will be converted (and copied).\n", " | \n", " | The 'newton-cg', 'sag', and 'lbfgs' solvers support only L2 regularization\n", " | with primal formulation. The 'liblinear' solver supports both L1 and L2\n", " | regularization, with a dual formulation only for the L2 penalty.\n", " | \n", " | Read more in the :ref:`User Guide `.\n", " | \n", " | Parameters\n", " | ----------\n", " | penalty : str, 'l1' or 'l2', default: 'l2'\n", " | Used to specify the norm used in the penalization. The 'newton-cg',\n", " | 'sag' and 'lbfgs' solvers support only l2 penalties.\n", " | \n", " | .. versionadded:: 0.19\n", " | l1 penalty with SAGA solver (allowing 'multinomial' + L1)\n", " | \n", " | dual : bool, default: False\n", " | Dual or primal formulation. Dual formulation is only implemented for\n", " | l2 penalty with liblinear solver. Prefer dual=False when\n", " | n_samples > n_features.\n", " | \n", " | tol : float, default: 1e-4\n", " | Tolerance for stopping criteria.\n", " | \n", " | C : float, default: 1.0\n", " | Inverse of regularization strength; must be a positive float.\n", " | Like in support vector machines, smaller values specify stronger\n", " | regularization.\n", " | \n", " | fit_intercept : bool, default: True\n", " | Specifies if a constant (a.k.a. bias or intercept) should be\n", " | added to the decision function.\n", " | \n", " | intercept_scaling : float, default 1.\n", " | Useful only when the solver 'liblinear' is used\n", " | and self.fit_intercept is set to True. In this case, x becomes\n", " | [x, self.intercept_scaling],\n", " | i.e. a \"synthetic\" feature with constant value equal to\n", " | intercept_scaling is appended to the instance vector.\n", " | The intercept becomes ``intercept_scaling * synthetic_feature_weight``.\n", " | \n", " | Note! the synthetic feature weight is subject to l1/l2 regularization\n", " | as all other features.\n", " | To lessen the effect of regularization on synthetic feature weight\n", " | (and therefore on the intercept) intercept_scaling has to be increased.\n", " | \n", " | class_weight : dict or 'balanced', default: None\n", " | Weights associated with classes in the form ``{class_label: weight}``.\n", " | If not given, all classes are supposed to have weight one.\n", " | \n", " | The \"balanced\" mode uses the values of y to automatically adjust\n", " | weights inversely proportional to class frequencies in the input data\n", " | as ``n_samples / (n_classes * np.bincount(y))``.\n", " | \n", " | Note that these weights will be multiplied with sample_weight (passed\n", " | through the fit method) if sample_weight is specified.\n", " | \n", " | .. versionadded:: 0.17\n", " | *class_weight='balanced'*\n", " | \n", " | random_state : int, RandomState instance or None, optional, default: None\n", " | The seed of the pseudo random number generator to use when shuffling\n", " | the data. If int, random_state is the seed used by the random number\n", " | generator; If RandomState instance, random_state is the random number\n", " | generator; If None, the random number generator is the RandomState\n", " | instance used by `np.random`. Used when ``solver`` == 'sag' or\n", " | 'liblinear'.\n", " | \n", " | solver : str, {'newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'}, default: 'liblinear'.\n", " | \n", " | Algorithm to use in the optimization problem.\n", " | \n", " | - For small datasets, 'liblinear' is a good choice, whereas 'sag' and\n", " | 'saga' are faster for large ones.\n", " | - For multiclass problems, only 'newton-cg', 'sag', 'saga' and 'lbfgs'\n", " | handle multinomial loss; 'liblinear' is limited to one-versus-rest\n", " | schemes.\n", " | - 'newton-cg', 'lbfgs' and 'sag' only handle L2 penalty, whereas\n", " | 'liblinear' and 'saga' handle L1 penalty.\n", " | \n", " | Note that 'sag' and 'saga' fast convergence is only guaranteed on\n", " | features with approximately the same scale. You can\n", " | preprocess the data with a scaler from sklearn.preprocessing.\n", " | \n", " | .. versionadded:: 0.17\n", " | Stochastic Average Gradient descent solver.\n", " | .. versionadded:: 0.19\n", " | SAGA solver.\n", " | .. versionchanged:: 0.20\n", " | Default will change from 'liblinear' to 'lbfgs' in 0.22.\n", " | \n", " | max_iter : int, default: 100\n", " | Useful only for the newton-cg, sag and lbfgs solvers.\n", " | Maximum number of iterations taken for the solvers to converge.\n", " | \n", " | multi_class : str, {'ovr', 'multinomial', 'auto'}, default: 'ovr'\n", " | If the option chosen is 'ovr', then a binary problem is fit for each\n", " | label. For 'multinomial' the loss minimised is the multinomial loss fit\n", " | across the entire probability distribution, *even when the data is\n", " | binary*. 'multinomial' is unavailable when solver='liblinear'.\n", " | 'auto' selects 'ovr' if the data is binary, or if solver='liblinear',\n", " | and otherwise selects 'multinomial'.\n", " | \n", " | .. versionadded:: 0.18\n", " | Stochastic Average Gradient descent solver for 'multinomial' case.\n", " | .. versionchanged:: 0.20\n", " | Default will change from 'ovr' to 'auto' in 0.22.\n", " | \n", " | verbose : int, default: 0\n", " | For the liblinear and lbfgs solvers set verbose to any positive\n", " | number for verbosity.\n", " | \n", " | warm_start : bool, default: False\n", " | When set to True, reuse the solution of the previous call to fit as\n", " | initialization, otherwise, just erase the previous solution.\n", " | Useless for liblinear solver. See :term:`the Glossary `.\n", " | \n", " | .. versionadded:: 0.17\n", " | *warm_start* to support *lbfgs*, *newton-cg*, *sag*, *saga* solvers.\n", " | \n", " | n_jobs : int or None, optional (default=None)\n", " | Number of CPU cores used when parallelizing over classes if\n", " | multi_class='ovr'\". This parameter is ignored when the ``solver`` is\n", " | set to 'liblinear' regardless of whether 'multi_class' is specified or\n", " | not. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n", " | context. ``-1`` means using all processors.\n", " | See :term:`Glossary ` for more details.\n", " | \n", " | Attributes\n", " | ----------\n", " | \n", " | classes_ : array, shape (n_classes, )\n", " | A list of class labels known to the classifier.\n", " | \n", " | coef_ : array, shape (1, n_features) or (n_classes, n_features)\n", " | Coefficient of the features in the decision function.\n", " | \n", " | `coef_` is of shape (1, n_features) when the given problem is binary.\n", " | In particular, when `multi_class='multinomial'`, `coef_` corresponds\n", " | to outcome 1 (True) and `-coef_` corresponds to outcome 0 (False).\n", " | \n", " | intercept_ : array, shape (1,) or (n_classes,)\n", " | Intercept (a.k.a. bias) added to the decision function.\n", " | \n", " | If `fit_intercept` is set to False, the intercept is set to zero.\n", " | `intercept_` is of shape (1,) when the given problem is binary.\n", " | In particular, when `multi_class='multinomial'`, `intercept_`\n", " | corresponds to outcome 1 (True) and `-intercept_` corresponds to\n", " | outcome 0 (False).\n", " | \n", " | n_iter_ : array, shape (n_classes,) or (1, )\n", " | Actual number of iterations for all classes. If binary or multinomial,\n", " | it returns only 1 element. For liblinear solver, only the maximum\n", " | number of iteration across all classes is given.\n", " | \n", " | .. versionchanged:: 0.20\n", " | \n", " | In SciPy <= 1.0.0 the number of lbfgs iterations may exceed\n", " | ``max_iter``. ``n_iter_`` will now report at most ``max_iter``.\n", " | \n", " | Examples\n", " | --------\n", " | >>> from sklearn.datasets import load_iris\n", " | >>> from sklearn.linear_model import LogisticRegression\n", " | >>> X, y = load_iris(return_X_y=True)\n", " | >>> clf = LogisticRegression(random_state=0, solver='lbfgs',\n", " | ... multi_class='multinomial').fit(X, y)\n", " | >>> clf.predict(X[:2, :])\n", " | array([0, 0])\n", " | >>> clf.predict_proba(X[:2, :]) # doctest: +ELLIPSIS\n", " | array([[9.8...e-01, 1.8...e-02, 1.4...e-08],\n", " | [9.7...e-01, 2.8...e-02, ...e-08]])\n", " | >>> clf.score(X, y)\n", " | 0.97...\n", " | \n", " | See also\n", " | --------\n", " | SGDClassifier : incrementally trained logistic regression (when given\n", " | the parameter ``loss=\"log\"``).\n", " | LogisticRegressionCV : Logistic regression with built-in cross validation\n", " | \n", " | Notes\n", " | -----\n", " | The underlying C implementation uses a random number generator to\n", " | select features when fitting the model. It is thus not uncommon,\n", " | to have slightly different results for the same input data. If\n", " | that happens, try with a smaller tol parameter.\n", " | \n", " | Predict output may not match that of standalone liblinear in certain\n", " | cases. See :ref:`differences from liblinear `\n", " | in the narrative documentation.\n", " | \n", " | References\n", " | ----------\n", " | \n", " | LIBLINEAR -- A Library for Large Linear Classification\n", " | https://www.csie.ntu.edu.tw/~cjlin/liblinear/\n", " | \n", " | SAG -- Mark Schmidt, Nicolas Le Roux, and Francis Bach\n", " | Minimizing Finite Sums with the Stochastic Average Gradient\n", " | https://hal.inria.fr/hal-00860051/document\n", " | \n", " | SAGA -- Defazio, A., Bach F. & Lacoste-Julien S. (2014).\n", " | SAGA: A Fast Incremental Gradient Method With Support\n", " | for Non-Strongly Convex Composite Objectives\n", " | https://arxiv.org/abs/1407.0202\n", " | \n", " | Hsiang-Fu Yu, Fang-Lan Huang, Chih-Jen Lin (2011). Dual coordinate descent\n", " | methods for logistic regression and maximum entropy models.\n", " | Machine Learning 85(1-2):41-75.\n", " | https://www.csie.ntu.edu.tw/~cjlin/papers/maxent_dual.pdf\n", " | \n", " | Method resolution order:\n", " | LogisticRegression\n", " | sklearn.base.BaseEstimator\n", " | sklearn.linear_model.base.LinearClassifierMixin\n", " | sklearn.base.ClassifierMixin\n", " | sklearn.linear_model.base.SparseCoefMixin\n", " | builtins.object\n", " | \n", " | Methods defined here:\n", " | \n", " | __init__(self, penalty='l2', dual=False, tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1, class_weight=None, random_state=None, solver='warn', max_iter=100, multi_class='warn', verbose=0, warm_start=False, n_jobs=None)\n", " | Initialize self. See help(type(self)) for accurate signature.\n", " | \n", " | fit(self, X, y, sample_weight=None)\n", " | Fit the model according to the given training data.\n", " | \n", " | Parameters\n", " | ----------\n", " | X : {array-like, sparse matrix}, shape (n_samples, n_features)\n", " | Training vector, where n_samples is the number of samples and\n", " | n_features is the number of features.\n", " | \n", " | y : array-like, shape (n_samples,)\n", " | Target vector relative to X.\n", " | \n", " | sample_weight : array-like, shape (n_samples,) optional\n", " | Array of weights that are assigned to individual samples.\n", " | If not provided, then each sample is given unit weight.\n", " | \n", " | .. versionadded:: 0.17\n", " | *sample_weight* support to LogisticRegression.\n", " | \n", " | Returns\n", " | -------\n", " | self : object\n", " | \n", " | predict_log_proba(self, X)\n", " | Log of probability estimates.\n", " | \n", " | The returned estimates for all classes are ordered by the\n", " | label of classes.\n", " | \n", " | Parameters\n", " | ----------\n", " | X : array-like, shape = [n_samples, n_features]\n", " | \n", " | Returns\n", " | -------\n", " | T : array-like, shape = [n_samples, n_classes]\n", " | Returns the log-probability of the sample for each class in the\n", " | model, where classes are ordered as they are in ``self.classes_``.\n", " | \n", " | predict_proba(self, X)\n", " | Probability estimates.\n", " | \n", " | The returned estimates for all classes are ordered by the\n", " | label of classes.\n", " | \n", " | For a multi_class problem, if multi_class is set to be \"multinomial\"\n", " | the softmax function is used to find the predicted probability of\n", " | each class.\n", " | Else use a one-vs-rest approach, i.e calculate the probability\n", " | of each class assuming it to be positive using the logistic function.\n", " | and normalize these values across all the classes.\n", " | \n", " | Parameters\n", " | ----------\n", " | X : array-like, shape = [n_samples, n_features]\n", " | \n", " | Returns\n", " | -------\n", " | T : array-like, shape = [n_samples, n_classes]\n", " | Returns the probability of the sample for each class in the model,\n", " | where classes are ordered as they are in ``self.classes_``.\n", " | \n", " | ----------------------------------------------------------------------\n", " | Methods inherited from sklearn.base.BaseEstimator:\n", " | \n", " | __getstate__(self)\n", " | \n", " | __repr__(self)\n", " | Return repr(self).\n", " | \n", " | __setstate__(self, state)\n", " | \n", " | get_params(self, deep=True)\n", " | Get parameters for this estimator.\n", " | \n", " | Parameters\n", " | ----------\n", " | deep : boolean, optional\n", " | If True, will return the parameters for this estimator and\n", " | contained subobjects that are estimators.\n", " | \n", " | Returns\n", " | -------\n", " | params : mapping of string to any\n", " | Parameter names mapped to their values.\n", " | \n", " | set_params(self, **params)\n", " | Set the parameters of this estimator.\n", " | \n", " | The method works on simple estimators as well as on nested objects\n", " | (such as pipelines). The latter have parameters of the form\n", " | ``__`` so that it's possible to update each\n", " | component of a nested object.\n", " | \n", " | Returns\n", " | -------\n", " | self\n", " | \n", " | ----------------------------------------------------------------------\n", " | Data descriptors inherited from sklearn.base.BaseEstimator:\n", " | \n", " | __dict__\n", " | dictionary for instance variables (if defined)\n", " | \n", " | __weakref__\n", " | list of weak references to the object (if defined)\n", " | \n", " | ----------------------------------------------------------------------\n", " | Methods inherited from sklearn.linear_model.base.LinearClassifierMixin:\n", " | \n", " | decision_function(self, X)\n", " | Predict confidence scores for samples.\n", " | \n", " | The confidence score for a sample is the signed distance of that\n", " | sample to the hyperplane.\n", " | \n", " | Parameters\n", " | ----------\n", " | X : array_like or sparse matrix, shape (n_samples, n_features)\n", " | Samples.\n", " | \n", " | Returns\n", " | -------\n", " | array, shape=(n_samples,) if n_classes == 2 else (n_samples, n_classes)\n", " | Confidence scores per (sample, class) combination. In the binary\n", " | case, confidence score for self.classes_[1] where >0 means this\n", " | class would be predicted.\n", " | \n", " | predict(self, X)\n", " | Predict class labels for samples in X.\n", " | \n", " | Parameters\n", " | ----------\n", " | X : array_like or sparse matrix, shape (n_samples, n_features)\n", " | Samples.\n", " | \n", " | Returns\n", " | -------\n", " | C : array, shape [n_samples]\n", " | Predicted class label per sample.\n", " | \n", " | ----------------------------------------------------------------------\n", " | Methods inherited from sklearn.base.ClassifierMixin:\n", " | \n", " | score(self, X, y, sample_weight=None)\n", " | Returns the mean accuracy on the given test data and labels.\n", " | \n", " | In multi-label classification, this is the subset accuracy\n", " | which is a harsh metric since you require for each sample that\n", " | each label set be correctly predicted.\n", " | \n", " | Parameters\n", " | ----------\n", " | X : array-like, shape = (n_samples, n_features)\n", " | Test samples.\n", " | \n", " | y : array-like, shape = (n_samples) or (n_samples, n_outputs)\n", " | True labels for X.\n", " | \n", " | sample_weight : array-like, shape = [n_samples], optional\n", " | Sample weights.\n", " | \n", " | Returns\n", " | -------\n", " | score : float\n", " | Mean accuracy of self.predict(X) wrt. y.\n", " | \n", " | ----------------------------------------------------------------------\n", " | Methods inherited from sklearn.linear_model.base.SparseCoefMixin:\n", " | \n", " | densify(self)\n", " | Convert coefficient matrix to dense array format.\n", " | \n", " | Converts the ``coef_`` member (back) to a numpy.ndarray. This is the\n", " | default format of ``coef_`` and is required for fitting, so calling\n", " | this method is only required on models that have previously been\n", " | sparsified; otherwise, it is a no-op.\n", " | \n", " | Returns\n", " | -------\n", " | self : estimator\n", " | \n", " | sparsify(self)\n", " | Convert coefficient matrix to sparse format.\n", " | \n", " | Converts the ``coef_`` member to a scipy.sparse matrix, which for\n", " | L1-regularized models can be much more memory- and storage-efficient\n", " | than the usual numpy.ndarray representation.\n", " | \n", " | The ``intercept_`` member is not converted.\n", " | \n", " | Notes\n", " | -----\n", " | For non-sparse models, i.e. when there are not many zeros in ``coef_``,\n", " | this may actually *increase* memory usage, so use this method with\n", " | care. A rule of thumb is that the number of zero elements, which can\n", " | be computed with ``(coef_ == 0).sum()``, must be more than 50% for this\n", " | to provide significant benefits.\n", " | \n", " | After calling this method, further fitting with the partial_fit\n", " | method (if any) will not work until you call densify.\n", " | \n", " | Returns\n", " | -------\n", " | self : estimator\n", "\n" ] } ], "source": [ "help(LogisticRegression)" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "# Definimos el modelo\n", "logreg=LogisticRegression(random_state=0, solver='lbfgs') #Se puede fijar un solver" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'C': 1.0,\n", " 'class_weight': None,\n", " 'dual': False,\n", " 'fit_intercept': True,\n", " 'intercept_scaling': 1,\n", " 'max_iter': 100,\n", " 'multi_class': 'warn',\n", " 'n_jobs': None,\n", " 'penalty': 'l2',\n", " 'random_state': 0,\n", " 'solver': 'lbfgs',\n", " 'tol': 0.0001,\n", " 'verbose': 0,\n", " 'warm_start': False}" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# parámetros del modelo\n", "logreg.get_params()" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [], "source": [ "# Definimos X e y\n", "X = dt.drop('Survived',axis=1) # Para que me de un dataframe y no una serie\n", "y=dt['Survived']" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [], "source": [ "# Ajustamos\n", "logreg = logreg.fit(X,y)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Coeficientes del modelo: $\\beta$ = `coef_`, $\\alpha$ = `intercept_`.\n", "\n", "$$p=\\frac{1}{1+e^{-(\\alpha+\\beta x)}}$$" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "alpha: [1.01628767]\n", "beta: [[-2.44597988]]\n" ] } ], "source": [ "print('alpha: ', logreg.intercept_)\n", "print('beta: ', logreg.coef_)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Odds de la mujer: $e^\\alpha$." ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([2.76291884])" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.exp(logreg.intercept_)" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2.876543209876542" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "odds_mujer" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Odds del hombre: $e^{\\alpha+\\beta}$" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[0.23938259]])" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.exp(logreg.intercept_+logreg.coef_)" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.23290598290598288" ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "odds_hombre" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Odds ratio: $e^\\beta$" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[0.0866412]])" ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.exp(logreg.coef_)" ] }, { "cell_type": "code", "execution_count": 56, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.08096731594585674" ] }, "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ "odds_ratio" ] }, { "cell_type": "code", "execution_count": 57, "metadata": {}, "outputs": [], "source": [ "# Predecimos\n", "y_pred = logreg.predict(X)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Hagamos la confusion matrix:" ] }, { "cell_type": "code", "execution_count": 58, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[468, 109],\n", " [ 81, 233]])" ] }, "execution_count": 58, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.metrics import confusion_matrix\n", "confusion_matrix(y_pred,y)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "La precisión del modelo la podemos medir con su accuracy. Se puede obtener con `.score`, que nos evalúa unos datos sobre un modelo y calcula el error o con `accuracy_score` que calcula la accuracy entre un vector de `y` reales otro de predichas" ] }, { "cell_type": "code", "execution_count": 59, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.7867564534231201" ] }, "execution_count": 59, "metadata": {}, "output_type": "execute_result" } ], "source": [ "logreg.score(X,y)" ] }, { "cell_type": "code", "execution_count": 60, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.7867564534231201" ] }, "execution_count": 60, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.metrics import accuracy_score\n", "accuracy_score(y,y_pred)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Hagamos otro ejemplo con más variables:" ] }, { "cell_type": "code", "execution_count": 61, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SurvivedSexAgePclass
00male22.03
11female38.01
21female26.03
31female35.01
40male35.03
\n", "
" ], "text/plain": [ " Survived Sex Age Pclass\n", "0 0 male 22.0 3\n", "1 1 female 38.0 1\n", "2 1 female 26.0 3\n", "3 1 female 35.0 1\n", "4 0 male 35.0 3" ] }, "execution_count": 61, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dt=titanic[['Survived','Sex','Age','Pclass']]\n", "dt.head()" ] }, { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 891 entries, 0 to 890\n", "Data columns (total 4 columns):\n", "Survived 891 non-null int64\n", "Sex 891 non-null object\n", "Age 714 non-null float64\n", "Pclass 891 non-null int64\n", "dtypes: float64(1), int64(2), object(1)\n", "memory usage: 27.9+ KB\n" ] } ], "source": [ "dt.info()" ] }, { "cell_type": "code", "execution_count": 64, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 891 entries, 0 to 890\n", "Data columns (total 4 columns):\n", "Survived 891 non-null int64\n", "Sex 891 non-null object\n", "Age 714 non-null float64\n", "Pclass 891 non-null object\n", "dtypes: float64(1), int64(1), object(2)\n", "memory usage: 27.9+ KB\n" ] } ], "source": [ "# Me voy a transformar Pclass en string porque es una variable categórica\n", "dt.Pclass=dt.Pclass.apply(lambda x: str(x))\n", "dt.info()" ] }, { "cell_type": "code", "execution_count": 65, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SurvivedAgeSex_femaleSex_malePclass_1Pclass_2Pclass_3
0022.001001
1138.010100
2126.010001
3135.010100
4035.001001
\n", "
" ], "text/plain": [ " Survived Age Sex_female Sex_male Pclass_1 Pclass_2 Pclass_3\n", "0 0 22.0 0 1 0 0 1\n", "1 1 38.0 1 0 1 0 0\n", "2 1 26.0 1 0 0 0 1\n", "3 1 35.0 1 0 1 0 0\n", "4 0 35.0 0 1 0 0 1" ] }, "execution_count": 65, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Me saco los dummies de las variables categóricas\n", "dt=pd.get_dummies(dt)\n", "dt.head()" ] }, { "cell_type": "code", "execution_count": 66, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SurvivedAgeSex_malePclass_1Pclass_2
0022.0100
1138.0010
2126.0000
3135.0010
4035.0100
\n", "
" ], "text/plain": [ " Survived Age Sex_male Pclass_1 Pclass_2\n", "0 0 22.0 1 0 0\n", "1 1 38.0 0 1 0\n", "2 1 26.0 0 0 0\n", "3 1 35.0 0 1 0\n", "4 0 35.0 1 0 0" ] }, "execution_count": 66, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Filtramos las clases que sobran\n", "dt=dt.drop('Pclass_3',axis=1)\n", "dt=dt.drop('Sex_female',axis=1)\n", "dt.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Voy a tratar con los NaN" ] }, { "cell_type": "code", "execution_count": 67, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Survived 0\n", "Age 177\n", "Sex_male 0\n", "Pclass_1 0\n", "Pclass_2 0\n", "dtype: int64" ] }, "execution_count": 67, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.sum(dt.isnull())" ] }, { "cell_type": "code", "execution_count": 70, "metadata": {}, "outputs": [], "source": [ "# Me quito los NaN\n", "dt=dt.dropna()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Hacemos el modelo:" ] }, { "cell_type": "code", "execution_count": 72, "metadata": {}, "outputs": [], "source": [ "# Seleccionamos X e y\n", "X=dt.drop('Survived', axis=1)\n", "y=dt['Survived']" ] }, { "cell_type": "code", "execution_count": 86, "metadata": {}, "outputs": [], "source": [ "# Definir:\n", "logreg= LogisticRegression(random_state=0,solver='lbfgs')\n", "\n", "# Ajustar:\n", "logreg=logreg.fit(X,y)\n", "\n", "# Predecimos\n", "y_pred=logreg.predict(X)" ] }, { "cell_type": "code", "execution_count": 75, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1,\n", " 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1,\n", " 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1,\n", " 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0,\n", " 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0,\n", " 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0,\n", " 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1,\n", " 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1,\n", " 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,\n", " 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1,\n", " 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1,\n", " 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1,\n", " 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0,\n", " 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1,\n", " 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0,\n", " 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0,\n", " 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,\n", " 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1,\n", " 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0,\n", " 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0,\n", " 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0,\n", " 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,\n", " 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0,\n", " 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", " 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0,\n", " 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0,\n", " 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,\n", " 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0,\n", " 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0,\n", " 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1,\n", " 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1,\n", " 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1,\n", " 1, 0, 1, 0, 0, 0, 0, 1, 1, 0])" ] }, "execution_count": 75, "metadata": {}, "output_type": "execute_result" } ], "source": [ "y_pred" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Me ha predicho la variable target como unos y ceros, pero me puede interesar ver cómo lo predice como probabilidades. (Básicamente fija un umbral de 0,5 y devuelve 1 si la probabilidad es mayor que el umbral y 0 en caso contrario)." ] }, { "cell_type": "code", "execution_count": 77, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[0.88081218, 0.11918782],\n", " [0.10117928, 0.89882072],\n", " [0.43713259, 0.56286741],\n", " ...,\n", " [0.05569741, 0.94430259],\n", " [0.44933092, 0.55066908],\n", " [0.91216125, 0.08783875]])" ] }, "execution_count": 77, "metadata": {}, "output_type": "execute_result" } ], "source": [ "probs=logreg.predict_proba(X)\n", "probs" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Podemos sacar la accuracy:" ] }, { "cell_type": "code", "execution_count": 81, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.7899159663865546" ] }, "execution_count": 81, "metadata": {}, "output_type": "execute_result" } ], "source": [ "logreg.score(X,y)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Coeficientes:" ] }, { "cell_type": "code", "execution_count": 83, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(array([[-0.03401702, -2.38901944, 2.33958631, 1.12651064]]),\n", " array([1.13725005]))" ] }, "execution_count": 83, "metadata": {}, "output_type": "execute_result" } ], "source": [ "logreg.coef_, logreg.intercept_" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Hay varios coeficientes ($\\beta_1, \\beta_2, \\beta_3, \\beta_4$), uno por cada variable y el otro es el $\\alpha$." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Por último, hallemos la ROC curve:" ] }, { "cell_type": "code", "execution_count": 84, "metadata": {}, "outputs": [], "source": [ "from sklearn.metrics import roc_curve, auc" ] }, { "cell_type": "code", "execution_count": 88, "metadata": {}, "outputs": [], "source": [ "fpr, tpr, threshold = roc_curve(y,probs[:,1])\n", "roc_auc = auc(fpr,tpr)" ] }, { "cell_type": "code", "execution_count": 89, "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": 90, "metadata": {}, "outputs": [], "source": [ "t=np.arange(0,5,0.2)" ] }, { "cell_type": "code", "execution_count": 91, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYoAAAEWCAYAAAB42tAoAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xm8TfX6wPHPYxZCNBqiqFCSzjVUQqOUdBtQEk1u86QuDbfkur/KrVQ3Dag0UomilCZSSihDhpShOFIkZDpyeH5/POs423HOPvscZ5+19z7P+/XaL3uvtfZez1722c9e3+/6Pl9RVZxzzrm8lAo7AOecc4nNE4VzzrmoPFE455yLyhOFc865qDxROOeci8oThXPOuag8UbiYiUh3Efkw7DgSiYhsEpHDQthvPRFRESlT3PuOBxGZLyLtCvE8/0wWA08USUpEfhKRrcEX1a8iMkJEKsdzn6r6qqqeEc99RBKRE0TkUxHZKCIbRGS8iDQurv3nEs9kEbkqcpmqVlbVpXHa3xEi8qaI/B68/7kicpuIlI7H/gorSFgN9uY1VLWJqk7OZz97JMfi/kyWVJ4oklsnVa0MNAOOA+4MOZ5Cye1XsYi0Bj4E3gEOAeoDc4Cp8fgFn2i/zEXkcOBrYAVwjKpWBS4C0oAqRbyv0N57oh13lwdV9VsS3oCfgNMiHg8C3ot4XB54GFgO/AY8A1SMWN8ZmA38CSwBOgTLqwLPAauAlcBAoHSwrhfwRXD/GeDhHDG9A9wW3D8EeAtYAywDborYrj8wGngl2P9Vuby/z4Gncln+PvBScL8dkA7cBfweHJPusRyDiOf2BX4FXgaqA+8GMa8L7tcOtv8PsAPIADYBTwbLFWgQ3B8BDAHeAzZiX/SHR8RzBrAI2AA8BXyW23sPtn0l8v8zl/X1gn33DN7f78DdEetbAF8B64P/yyeBchHrFbge+BFYFix7HEtMfwLfAG0iti8dHOclwXv7BqgDTAlea3NwXLoG25+Dfb7WA18CTXN8dvsCc4FtQBkiPs9B7DODOH4DHg2WLw/2tSm4tSbiMxls0wT4CPgjeO5dYf+tpsIt9AD8Vsj/uN3/sGoD3wGPR6x/DBgH7If9Ah0PPBCsaxF8WZ2OnVXWAo4K1r0NPAtUAg4ApgP/CNbt+qMETg6+VCR4XB3YiiWIUsEXyb1AOeAwYClwZrBtf2A7cF6wbcUc720f7Eu5fS7v+3JgVXC/HZAJPIolhbbBF9aRMRyDrOc+FDy3IlADuCDYfxXgTeDtiH1PJscXO3smij+C41sGeBUYFayrGXzxnR+suzk4Bnklil+By6P8/9cL9j0siP1Y7Eu3UbD+eKBVsK96wELglhxxfxQcm6zkeWlwDMoAfYIYKgTr7sA+Y0cCEuyvRs5jEDxuDqwGWmIJpif2eS0f8dmdjSWaihHLsj7PXwE9gvuVgVY53nOZiH31IvszWQVLin2ACsHjlmH/rabCLfQA/FbI/zj7w9qE/bpT4BOgWrBOsC/MyF+zrcn+5fgsMDiX1zww+LKJPPO4GJgU3I/8oxTsF97JweOrgU+D+y2B5Tle+07gheB+f2BKlPdWO3hPR+WyrgOwPbjfDvuyrxSx/g3gXzEcg3bAX1lfhHnE0QxYF/F4MvkniuER6zoC3wf3LwO+ilgnWKLNK1FsJzjLy2N91pdm7Yhl04FueWx/CzA2R9yn5PMZWwccG9xfBHTOY7ucieJp4N85tlkEtI347F6Ry+c5K1FMAe4HaubxnvNKFBcDs+L5d1dSb94+mNzOU9WPRaQt8Br2q3U9sD/2q/gbEcnaVrBfd2C/5Cbk8nqHAmWBVRHPK4V9oe1GVVVERmF/nFOAS7DmkqzXOURE1kc8pTTWnJRlj9eMsA7YCRwMfJ9j3cFYM8uubVV1c8Tjn7GzmvyOAcAaVc3YtVJkH2AwloyqB4uriEhpVd0RJd5Iv0bc34L9IiaIadd7Do5fepTXWYu910LtT0SOwM600rDjUAY7y4u02/+BiPQBrgpiVWBf7DMF9plZEkM8YP//PUXkxohl5YLXzXXfOVwJDAC+F5FlwP2q+m4M+y1IjK4AvDM7BajqZ9iv2YeDRb9jzUBNVLVacKuq1vEN9kd6eC4vtQI7o6gZ8bx9VbVJHrseCVwoIodiZxFvRbzOsojXqKaqVVS1Y2TYUd7PZqz54aJcVnfBzp6yVBeRShGP6wK/xHAMcouhD9a00lJV98Wa18ASTNSYY7AKO1OyF7TsVTvvzfkYawYrrKexJNsweC93kf0+sux6PyLSBus36AJUV9VqWPNk1nPy+szkZgXwnxz///uo6sjc9p2Tqv6oqhdjTZ8PAaOD/+P8jn9BYnQF4IkidTwGnC4izVR1J9Z2PVhEDgAQkVoicmaw7XPA5SJyqoiUCtYdpaqrsCuNHhGRfYN1hwdnLHtQ1VlYx+9wYKKqZp1BTAf+FJG+IlJRREqLyNEi8rcCvJ9+2K/Sm0SkiohUF5GBWPPR/Tm2vV9EygVfducAb8ZwDHJTBUsu60VkP+C+HOt/w/pbCuM94BgROS+40ud64KAo298HnCAi/xWRg4L4G4jIKyJSLYb9VcH6RDaJyFHAtTFsn4n9f5YRkXuxM4osw4F/i0hDMU1FpEawLudxGQZcIyItg20ricjZIhLT1VoicqmI7B/8H2Z9pnYEse0k7/+Dd4GDROQWESkffG5axrJPF50nihShqmuAl7D2ebBfh4uBaSLyJ/YL9chg2+lYp/Bg7FfjZ1hzAVhbejlgAdYENJroTSAjgdOwpq+sWHYAnbA2/mXYr/vh2BVVsb6fL4Azsc7fVViT0nHASar6Y8SmvwZx/oJ1Hl+jqlnNVXkegzw8hnUM/w5MAz7Isf5x7AxqnYg8Eet7Cd7P79gZ0iCsWakxdmXPtjy2X4IlxXrAfBHZgJ2xzcT6pfJzO9YcuBH74n49n+0nYleU/YAd6wx2bx56FOv/+RBLQM9hxwqsz+lFEVkvIl1UdSbWZ/Uk9n+zGOtLiFUH7D1vwo55N1XNUNUt2NVnU4N9tYp8kqpuxC7Q6IR9Ln4E2hdgvy4PWVesOJd0gpG8r6hqtCachCQipbDLc7ur6qSw43EuGj+jcK6YiMiZIlJNRMqT3WcwLeSwnMtX3BKFiDwvIqtFZF4e60VEnhCRxUFpgubxisW5BNEauyrnd6x55DxV3RpuSM7lL25NTyJyMnad/0uqenQu6zsCN2LXmrfEBot5x5NzziWYuJ1RqOoUbJRqXjpjSURVdRpQTURiuW7cOedcMQpzwF0tdr+qIj1YtirnhiLSG+gNUKlSpeOPOuqoYgnQOecSybZtsH49/PIL7NwZ23Pq8jPVWM9cMn9X1f0Ls98wE0XOwT+Qx4AaVR0KDAVIS0vTmTNnxjMu55xDFX7+GTIzi/61166F8ePh3Xfh99/z3x5gxw74NRiHf+KJMGwYVMlrZEpWl4IIlV56mlJrV1Pt0f4/FzbeMBNFOjbkPktt7Fp455wL1Z9/Qs+e8Pbb8dtHqVLQpg2kpcX+nMaN4bzz4LBowz5XroTrroWuXaF7d7grGGv5aP9CxxpmohgH3BDUC2oJbAhGBjvnXGgWLoS//x0WL4Z774WGDYt+HxUqQLt2ULNmvpvGThWGD4fbb4ft2+Hss4vspeOWKERkJFahs2ZQ/Ow+rOAcqvoMVpSuIzZqcws2Utg550Izdixcdhnssw988gm0zbV4TQJasgSuvhomTYL27a1d6vCiK3sVt0QRFPWKtl6xejfOOReqLVvg3/+GBx+EFi3grbegdjKN9//uO/jmGxg6FK66CiS3LuDC8zLjzrkSIzMTVq2C5cuto3r5cvjpJxgzBtassR/l//sflC8fdqQxmDcPvv3WToHOOw+WLoUaNfJ/XiF4onDOpTRVa1Lq3x8WLLCrhyLVqAGtW8M//2mdywnvr7/g//7PbgceCF26WKdHnJIEeKJwzqWwWbPg1lvhs8+gSRPo2xcOPRTq1rV/69SBypXzf52E8fXXcOWVMH8+XHopDB5sSSLOPFE451JSv34waJD90H76aWu6L5PM33grV9opz4EH2gCMIryqKT/JfNiccyXQ66/bmcH27dG3++UX65ieOBGqxTLVU6L64Qc44gioVcve/Kmnwr775v+8IuSJwjmXNBYvtpaXww+3JBBN6dJw881JnCTWr7eOk+HDYfJkOPlkG+ARAk8UzrmEtXOn/aCePh1mzIA33oCyZa3lpU6d/J+ftMaNg2uvtZodd9wBfyvILMJFzxOFcy4hbNhgl64uWmSJISs5bNhg6ytXtuTw0EMpniSuugqeew6OOQbeeadgNT7ixBOFc67YbdsGc+ZkJ4Tp0y1BZClTBpo2hYsvtiamFi3gqKOsOSklRRTxIy3NLsnq2xfKlQs3roAnCudckVO1qqgrV+55mzsXZs/O7ow+8EBo2RJ69IB69azgXbNmULFiqG+h+KxYAddcA9262UG45pqwI9qDJwrn3B62brUv+rVr8/438v5ff2U/V3XPZWA/lg88EI480sY2ZJ0p1K5d5BUnksPOnfDss3bmsGNHaB3VsfBE4VyK27zZLpr5JaKI/9at0ZPA1igzeVetamMTataEAw6ARo32HPO13352NWfk7aCDrCPaAT/+aH0RU6bAaadZjab69cOOKk+eKJxLMarw/ffw/vt2mzJlz1/3YL/iq1e3L/waNeyXfbNm2Ukgt3/328+/7IvEggXWBvf889CrV8KfUnmicC7Jbd8Oq1db8dCs5PBzMJdZo0Zwww1w1lnWGZz1fVS+vCWJlO0cTkRz5ljnTM+e0LmzFfGrXj3sqGLiicK5BKMKv/1mUwwsW2bNQNu3W3XT1att3erV2bd167KfW7myDdy9807o0MEunnEh27YNBg60GuYHH2wzz1WokDRJAjxROBc3O3bYl3rOq35Wrcq9KWjzZksOS5fa/Ai5qVHD+gUOOMAuH826f+CBVuXhxBMT5opKB/DVVzaUfOFCKwf+6KPFUsSvqHmicG4v9e4N7723+7IdO6xjOGdJ69KlrVM3t0s/K1Sw/szTTrMSFYcfbo+rVLFxBd4/kGRWrrQp8g46CCZMsPa/JOWJwrkC+vZbmDkz+/Hbb9uX+SmnZC8Tgf333/PKnwMO8H6BlLdwoXUO1aplNUdOPdU+IEnME4VzBfDWWzZaOGfl0ssug4cfDicmlyDWrYM+feCFF+xSszZtbOa5FOCJwrkoVqywzuStW638zpAh0KoVvPzy7k3NBx8cXowuAYwdC9ddZ1cc3Hln6EX8iponCucibN9uPwaXLLFBaiNHZq8rUwYuvxweeyzJZkVz8XXFFXYW0ayZdVY1bx52REXOE4Ur8T75xM4WMjIsOURebgrWpHTIIXZFUd26oYToEk1kEb9WraBhQ7j99pS92sAThUtpq1bBxo3Rt3nsMfjgA7vKqFMnK7mTlgalSkGlSlaywrldfv4Z/vEPuOQS65zq3TvsiOLOE4VLKaqwfLldmfTCCzB+fGzPO+YYq6jgXJ527rTJt/v1sw/aRReFHVGx8UThUsKHH9rA19mzs5uOatSAe++1aqX5ado0vvG5JLdokRXx++ILOOMMq/par17YURUbTxQuaf3+u12F9OKLVuoC4Oqr4bjj7HbssSVoTgMXX4sWwfz5MGKENTcleBG/ouaJwhWb336zpqCdO/fudTZutMJ3kyfbyOcOHWw0c7t21mzsXJGYNctOUS+/HM4912qrVKsWdlSh8EThitymTdbk8+uvuy+PvNR0bx15pM05f+ml0KRJ0b2uc2RkwIABMGiQja6++GIbNFNCkwR4onBRqMJPP1lds8GDrXRNLLZssWRx2GG7L8+qX/TCC3sXV5kyVgrDuSI3daoV8Vu0yM4kHnkkKYv4FTVPFCXchg02qDTnVJYLFtjyFStsWePGcM45sb/uhRdan59zSWPlSmjf3s4iJk70D3AETxQl1NCh1sb/7bf24ymnChXs7+Suu6BBAyt4V6pUsYfpXPwtWGC/hGrVsmJe7dv70PscPFGUMHPnwr//DaNHQ506NnfKO+/YALNI1av7FUMuxf3xB9x2m10299lncPLJNuLS7cETRQkye7bVKqtYEe67zzqc/SzBlUhvvQXXXw9r18Ldd0OLFmFHlNA8UZQg//qXlcX//nvvDHYlWK9edhbRvLnVbmnWLOyIEp4nihLkq6+sk9mThCtxIov4nXCCTSzUp49dQufyFdejJCIdgMeB0sBwVX0wx/q6wItAtWCbfqo6IZ4xpbK1a+Gjj+xKptxs3erzKbsSaNkyK9x36aXQs2eJKOJX1OKWKESkNDAEOB1IB2aIyDhVXRCx2T3AG6r6tIg0BiYA9eIVUyratg1uucVmX/ziiz3naM7pkEOKJy7nQrdjh9V4ufNO64zr3j3siJJWPM8oWgCLVXUpgIiMAjoDkYlCgX2D+1WBX+IYT0p55hkbPLpqVfayu+6Czp3taqbciMCBBxZPfM6FauFCGzj31Vdw1ln2B+OTiRRaPBNFLWBFxON0oGWObfoDH4rIjUAl4LTcXkhEegO9AeqW8P/sLVvg1VfhhhusdEWnTrDPPnYFU/XqYUfnXIJYvNgGCL38sp1JlLAifkUtnokit/8ZzfH4YmCEqj4iIq2Bl0XkaFXdrWycqg4FhgKkpaXlfI2Upgr//Cekp1sxvU8/taqpZcvCK6/YPArOOeCbb2DOHJuatFMn65vYd9/8n+fyFc+r6NOByEaQ2uzZtHQl8AaAqn4FVABqxjGmpPPjjzYV5yef2DiIE06wsUFr1niScA6wqzT69YOWLW00aUaGLfckUWTimShmAA1FpL6IlAO6AeNybLMcOBVARBphiWJNHGNKGn/8YeOBsibdefBBO5N+5x0bQOrTczoHTJliE4889JCNj5g1y4v4xUHcmp5UNVNEbgAmYpe+Pq+q80VkADBTVccBfYBhInIr1izVS1VLVNNSTvPn22yLQ4bY4x49rBR+x47hxuVcwlm5Ek491a7e+Phju+/iQpLtezktLU1nzpwZdhhx8eSTcOONVmKjVi0rxvfee15mw7ndfPdddrvru+9aEb9KlcKNKQmIyDeqmpb/lnvyr6AEMmuWzfO8fLn1Tbz/vicJ53b5/Xc7xW7a1JqcwGrfe5KIOx+/nmAqVoSa3p3vXDZVePNNuyZ83TqraNky55X2Lp48UTjnElvPnjYeIi3NLv/zy/2KnScK51ziiSzi17atNTfdcosX8QuJt4AnCFUbJ1GC5293zixdCqedBiNG2OMrr4Tbb/ckESJPFAli7FiblrRPn7AjcS4kO3bAY49Z09KMGX4lRwLxFJ0APvoIrroKjjrKKiE7V+IsWGClN77+Gs4+24r41a4ddlQu4IkiAXTqZOXCX3jBz65dCbVsGSxZAq+9Bt26eRG/BONfSwlg2za45horEe5ciTFjhnXMXX21nUUsXWpz9bqE442ACaBMGdhvv7CjcK6YbNlindOtWsEDD2QX8fMkkbA8UTjnis/kyXap6yOP2JmEF/FLCt70FLK+fSEz05tkXQmQng6nnw6HHmoTq7RvH3ZELkZ+RhEiVZg0ye77dL4uZc2ZY//Wrm118ufO9SSRZDxRFLPMTOjf3y7sSEuz/rzu3aFRo7Ajc66IrVkDl1wCzZrZbFtg9fL32SfcuFyBedNTMfjjD1i92s4g7rkHxoyBhg1t8qFnn7VSNs6lDFUYNQpuugk2bID774fWrcOOyu2FmBJFMENdXVVdHOd4Usrq1Vbo8oUX7BLYLI89BjffHF5czsVVjx7w6qtW4fW556BJk7Ajcnsp30QhImcDjwLlgPoi0gy4T1X/Hu/gktny5Vau5uef7YyhfXvrsK5Xz64KdC6l7NxpH3AR+7Aff7ydUZQuHXZkrgjEckYxAGgJTAJQ1dki0iCuUSWxjAx4/nkYONAuF580CU44IeyonIujxYvtUtcePawMx5VXhh2RK2KxdGZvV9X1OZYl1/ypxeimm+D66+3MYfJkTxIuhWVmwsMPWxG/WbOgXLmwI3JxEssZxUIR6QKUEpH6wM3AtPiGlZy2bIGRI62p6YUXfGyES2Hz5sHll8PMmVZ75qmn4JBDwo7KxUksZxQ3AMcDO4ExQAaWLFyE006DOnVg0yY7A/ck4VLa8uXWATdqlNXI9ySR0mI5ozhTVfsCfbMWiMj5WNJwgazKBDfeCO3ahR2Nc3Hw9dc2eK53bxsPsXQpVK4cdlSuGMRyRnFPLsvuLupAUkHHjjaYzi/0cCll82a47TYbCzFoUPa13p4kSow8zyhE5EygA1BLRB6NWLUv1gzlnEt1n35qVzQtXQrXXgsPPgjly4cdlStm0ZqeVgPzsD6J+RHLNwL94hmUcy4BpKfDmWdC/fpWguPkk8OOyIUkz0ShqrOAWSLyqqpmFGNMSWfuXJvut3r1sCNxrgjMmgXHHWdF/MaPh7ZtoWLFsKNyIYqlj6KWiIwSkbki8kPWLe6RJbi//x1q1IADD7SxElWr2lgj55LWb79B167QvHl2Eb8OHTxJuJiuehoBDAQeBs4CLqeE91H89Re8+66NN7rmGlt29tl+RuGSlKrVZrr5Zru+e+BAHynqdhNLothHVSeKyMOqugS4R0Q+j3dgiWziREsS775rCcK5pHbJJTYeonVrK+LnNe9dDrEkim0iIsASEbkGWAkcEN+wEs+6dVYefOBAqwpbowaccUbYUTlXSJFF/M44w5LE9df7td0uV7EkiluBysBNwH+AqkCJao3fssUG06Wn2xwsHTrYjI5ly4YdmXOF8MMPdsnrZZdZAb/LLw87Ipfg8k0Uqvp1cHcj0ANARGrHM6hE8+STliT694c77vAJulySysyERx+1SVIqVPBOahezqFc9icjfROQ8EakZPG4iIi9RgooCbthgY4zOOsv+vjxJuKQ0d65NhNK3r32YFyywvgnnYpBnohCRB4BXge7AByJyNzYnxRzgiOIJL1yqcPfd1j8xYEDY0Ti3F9LTYcUKePNNeOstOPjgsCNySSRa01Nn4FhV3Soi+wG/BI8XxfriItIBeBwoDQxX1Qdz2aYL0B+b42KOqibEz5ynnrIziRUr7LLXY48NOyLnCujLL+1M4pprsov4VaoUdlQuCUVLFBmquhVAVf8Qke8LmCRKA0OA04F0YIaIjFPVBRHbNATuBE5U1XUiEtrVVLffDi+9lP14zRr7d8QI6NTJO65dEtm0yU6F//c/OPxw66wuX96ThCu0aIniMBHJKiUuQL2Ix6jq+fm8dgtgsaouBRCRUdhZyoKIba4GhqjquuA1Vxcw/iIzdar9LXXqlL3s4ouhTZuwInKuED780MqAL19ul7v+3/95ET+316IligtyPH6ygK9dC1gR8Tgdm3s70hEAIjIVa57qr6of5HwhEekN9AaoW7duAcPI36ZN1g/RuLE1OTmXlFassBGghx8OU6bASSeFHZFLEdGKAn6yl6+d2xxvOefaLgM0BNoBtYHPReTonHN0q+pQYChAWlpakc3XvXMnvPwy3HknrFplP8ScSzrffAPHH29TLE6YYKfBFSqEHZVLIbEUBSysdKBOxOPaWId4zm3eUdXtqroMWIQljribMQNatIBevezv68svbW4W55LGr7/CRRdBWlp2Eb/TT/ck4YpcPBPFDKChiNQXkXJAN2Bcjm3eBtoDBGM1jgCWxjEmwGZ0bNvW/s5eeQW++soqGDiXFFThxRetrXT8eOuH8CJ+Lo5iKeEBgIiUV9VtsW6vqpkicgMwEet/eF5V54vIAGCmqo4L1p0hIguAHcAdqrq2YG+h4MaPh4wMK7u///7x3ptzRaxbN3jjDTjxRBg+HI46KuyIXIrLN1GISAvgOazGU10RORa4SlVvzO+5qjoBmJBj2b0R9xW4LbgVq1KlPEm4JBJZxK9jR+uHuO46+yA7F2exfMqeAM4B1gKo6hyC5qJkNWuWzx3hksj339s0pM89Z4979oQbbvAk4YpNLJ+0Uqr6c45lO+IRTHGYNcsuDOnTJ+xInMvH9u3W/3DssVabqXLlsCNyJVQsfRQrguYnDUZb3wgk7VSoK4KRHaefHm4czkU1e7aNqJ49Gy680EZZH3RQ2FG5EiqWRHEt1vxUF/gN+DhYltQkt1EeziWKX3+121tvwfn5FUFwLr5iSRSZqtot7pEUg7lzoXNnu++JwiWcL76wD+l119nsWEuWeF17lxBi6aOYISITRKSniFSJe0RxdNllULMm9OsHRx8ddjTOBTZutM7pNm3gscdgW3AVuicJlyDyTRSqejgwEDge+E5E3haRpDzD+OMPK/r3wANeDdYliIkT7VfLU0/BzTfDt996ET+XcGIacKeqXwJfikh/4DFsQqNRcYxrr6jCjz/aRSOR/vornHicy9WKFXDOOdCggTU7+ehql6BiGXBXGSsP3g1oBLwDJPQn+o03bPBqbnyaYBcq1exCY3XqwPvvW5VXr8/kElgsZxTzgPHAIFX9PM7x7LWtW63QH8DQoVCt2u7r27Ur7oicC6xaZXNEjB0LkydbwbHTTgs7KufyFUuiOExVd8Y9kiKyYIHVcdpnH0sY3hfhQqdqUyXedpt9OB96yOo0OZck8kwUIvKIqvYB3hKRPeaAiGGGu1CNGuVJwiWILl1g9Gi7qmn4cDjiiLAjcq5Aop1RvB78W9CZ7UI1YEDYETgH7Nhhg3VKlbJL7U45Bf7xD6/P5JJSnp9aVZ0e3G2kqp9E3rBO7YQ0LpjxwsdJuNAsXGhnD1lF/C67DK691pOES1qxfHKvyGXZlUUdSFEpVQruuQfq1w87ElfibN8OAwdCs2awaBFUrRp2RM4ViWh9FF2xS2Lri8iYiFVVgPW5Pytcr7xiZfudK3azZtnVE3PnQteu8MQTcMABYUflXJGI1kcxHZuDojYwJGL5RmBWPIMqrCFBlG3ahBuHK4F++w1+/x3efju7oJhzKSLPRKGqy4BlWLXYhLdyJUybBmecYTfn4m7KFPjuOxsb0aEDLF7sIzpdSsqzj0JEPgv+XScif0Tc1onIH8UXYmwefdT+rVMn3DhcCfDnn1bhtW1ba2JtQDc6AAAajklEQVTKKuLnScKlqGid2VnTndYE9o+4ZT1OKNu329WIQ4eGHYlLaRMmQJMm8OyzNoDOi/i5EiDa5bFZ3cJ1gNKqugNoDfwDqFQMscUsM9Mqw1ar5lcgujhascL6H6pWhS+/hEcegUoJ9afgXFzE8rX6NjYN6uHAS9gYitfiGlUB3HcfHHwwvPqq/etckVK1zi+wds0PP7SziJYtw43LuWIUS6LYqarbgfOBx1T1RqBWfMOK3eOPQ+3aVjH266/DjsallF9+gfPOg9at4bPPbFn79lCuXLhxOVfMYkkUmSJyEdADeDdYllBVlNq2hYsugsqVw47EpQRVq8nUuLGdQTz8sBfxcyVaLNVjrwCuw8qMLxWR+sDI+IblXIguvBDGjLFfIMOH28RCzpVg+SYKVZ0nIjcBDUTkKGCxqv4n/qE5V4wii/idd54Nxrn6ar86wjliaHoSkTbAYuA54HngBxHx83CXOubNs6alrCJ+PXp4pVfnIsTylzAY6KiqJ6rqCcDZwOPxDcu5YvDXX3D//dC8OSxZAtWrhx2Rcwkplj6Kcqq6IOuBqi4UEb/swyW3b76xIn7z5sEll8Bjj8H+CTeO1LmEEEui+FZEngVeDh53J0GLAjoXs7VrYf16GD8ezjkn7GicS2ixJIprgJuAfwICTAH+F8+gnIuLSZOsiN9NN1ln9Y8/QoUKYUflXMKLmihE5BjgcGCsqg4qnpCcK2IbNsA//2mFwI46yjqqy5f3JOFcjKJVj70LK9/RHfhIRHKb6c65xDZ+vA2cGz4cbr/d+ia8iJ9zBRLtjKI70FRVN4vI/sAE7PJY55LDihVwwQV2FvH22/C3v4UdkXNJKdrlsdtUdTOAqq7JZ1vnEoOqVXaF7CJ+M2d6knBuL0T78j9MRMYEt7HA4RGPx0R53i4i0kFEFonIYhHpF2W7C0VERSStoG/AuV3S0+Hcc23wXFYRv3btvIifc3spWtPTBTkeP1mQFxaR0thc26cD6cAMERkXOSYj2K4KdlWV1351hbNzJwwbBnfcYZOTPPoonHRS2FE5lzKizZn9yV6+dgusLtRSABEZBXQGFuTY7t/AIOD2vdyfK6kuuMD6IE45xRLGYYeFHZFzKSWe/Q61gBURj9PJMY+FiBwH1FHVd4lCRHqLyEwRmblmzZqij9Qln8xMO5MASxTDhsHHH3uScC4O4pkoJJdlumulSCmsjlSf/F5IVYeqapqqpu3vZRbc3Lk2mdCwYfb40kvhqqus+qtzrsjFnChEpKAXn6dj821nqQ38EvG4CnA0MFlEfgJaAeO8Q9vlads2m/v2+OPh55+9NpNzxSSWMuMtROQ74Mfg8bEiEksJjxlAQxGpHxQR7AaMy1qpqhtUtaaq1lPVesA04FxVnRlr8OvWwdatUDah5ttzcTFjhlV5HTAALr4YFi6E888POyrnSoRYziieAM4B1gKo6hygfX5PUtVM4AZgIrAQeENV54vIABE5t/AhZ3vkEdi+3aYPcClu3TrYtAkmTICXXoIaNcKOyLkSI5aigKVU9WfZvf13RywvrqoTsBHdkcvuzWPbdrG8ZpbRo+E//4G0NGjatCDPdEnj00+tiN/NN1sRvx9+8PIbzoUgljOKFSLSAlARKS0itwA/xDmufM2ebf8OGBBuHC4O1q+3aUhPPRWefdb6JsCThHMhiSVRXAvcBtQFfsM6na+NZ1CxKl0azjor7ChckXrnHSvi9/zzVvHVi/g5F7p8m55UdTXWEZ1QfvsNqlULOwpXpJYvh4sugkaNYNw4a1d0zoUu30QhIsOIGP+QRVV7xyWiGH33HRxzTJgRuCKhCl98AW3aQN26NmiuVSuvz+RcAoml6elj4JPgNhU4ANgWz6Dys3OnTXXsiSLJLV8OZ58NJ5+cXcTv5JM9STiXYGJpeno98rGIvAx8FLeIYvDTT7B5syeKpLVzJzzzDPTta2cUTzzhRfycS2CxXB6bU33g0KIOpCAWLbJ/GzcOMwpXaOefb53Wp59u05PWqxd2RM65KGLpo1hHdh9FKeAPIM+5JYrD9u32b8WKYUbhCiQzE0qVslvXrtC5M/Tq5fWZnEsCUROF2Ci7Y4GVwaKdqrpHx7ZzUc2ZA1dcYWMjrrnGSnA455JG1M7sICmMVdUdwc2ThItdRgbcc49d5pqeDgcdFHZEzrlCiOWqp+ki0jzukbjUMn06HHec1Vnp3t2K+J13XthROecKIc+mJxEpExT2Owm4WkSWAJuxeSZUVT15uLz9+aeV9v3gAzjzzLCjcc7thWh9FNOB5oD/DHSx+fBDmD8fbr0VTjvNLk/z8hvOJb1oiUIAVHVJMcXiktW6dXDbbTBiBDRpAtddZwnCk4RzKSFaothfRG7La6WqPhqHeFyyGTMGrr8e1qyBO++Ee+/1BOFciomWKEoDlcl97mvnrARHt25w9NE2odBxx4UdkXMuDqIlilWq6rM9uN2pwpQp0LatFfH79FNo2dLno3UuhUW7PNbPJNzufv7ZJgBp1y67iN9JJ3mScC7FRUsUpxZbFC6x7dwJTz5pHdVffAH/+5+VBXfOlQh5Nj2p6h/FGYhLYOedB+PH23iIZ5+FQ0OtCemcK2aFqR7rSoLt222u2VKlrDbThRdCjx5exM+5EiiWEh4JZ8IE+3effcKNI2V9+y20aGFzRoAlissu8yThXAmVdIkiM9NaP664Ao48MuxoUszWrTYWokUL+PVXqFMn7Iiccwkg6ZqesurXtmjhP3CL1LRp0LMn/PCDZeGHH4bq1cOOyjmXAJIuUbg42bzZ+iU++sjqNDnnXMATRUn2wQdWxK9PHzj1VPj+eyhXLuyonHMJJun6KFwRWLvWmpnOOgtefBH++suWe5JwzuXCE0VJogqjR0PjxvDaazb73IwZniCcc1F501NJsnw5XHIJNG1qc0cce2zYETnnkoCfUaQ6VSvcBzaievJku8LJk4RzLkaeKFLZsmVwxhnWUZ1VxO+EE6CMn0g652LniSIV7dgBjz9u80R8/TU8/bQX8XPOFZr/tExFnTvDe+9Bx45WhsNHWDvn9oInilQRWcSvRw+rz3TJJT583Tm31+La9CQiHURkkYgsFpF+uay/TUQWiMhcEflERLx+dWHMnAlpadbEBNC1K3Tv7knCOVck4pYoRKQ0MAQ4C2gMXCwijXNsNgtIU9WmwGhgULziSUlbt0LfvjYV6Zo1Pk+Ecy4u4nlG0QJYrKpLVfUvYBTQOXIDVZ2kqluCh9OA2nGMJ7V89ZVd4jpokBXxW7AAzjkn7Kiccykonn0UtYAVEY/TgZZRtr8SeD+3FSLSG+gNUKtWvSIKL8lt3WpTlH78sV3+6pxzcRLPM4rcGsg11w1FLgXSgP/mtl5Vh6pqmqqm1ahRowhDTDITJsB/g0N0yimwcKEnCedc3MUzUaQDkddl1gZ+ybmRiJwG3A2cq6rb4hhP8vr9d7j0Ujj7bHj11ewifmXLhhuXc65EiGeimAE0FJH6IlIO6AaMi9xARI4DnsWSxOo4xpKcVGHUKGjUCN54A+67D6ZP9yJ+zrliFbc+ClXNFJEbgIlAaeB5VZ0vIgOAmao6Dmtqqgy8KXYp53JVPTdeMSWd5cutHPixx8Jzz8Exx4QdkXOuBIrrgDtVnQBMyLHs3oj7PpVaTqrwySc2y9yhh1qNpr/9zQbTOedcCLzWUyJZssQ6p08/PbuIX6tWniScc6HyRJEIduyARx+1pqVvvoFnn/Uifs65hOG1nhJBp07w/vs2YO7pp6G2jzt0ziUOTxRh+esvmxeiVCno1csK+XXr5vWZnHMJx5uewjB9Ohx/PDz1lD3u0sWqvXqScM4lIE8UxWnLFujTB1q3hnXr4PDDw47IOefy5U1PxeWLL2xMxNKl8I9/wEMPQdWqYUflnHP58kRRXLImFpo0Cdq1Czsa55yLmSeKeBo/3gr3/fOf0L69lQIv44fcOZdcvI8iHtassWlIzz0XRo7MLuLnScI5l4Q8URQlVXjtNSviN3o0DBgAX3/tRfycc0nNf+IWpeXL4fLL4bjjrIhfkyZhR+Scc3vNzyj21s6dMHGi3T/0UPj8c5g61ZOEcy5leKLYGz/+aDPNdegAU6bYshYtvIifcy6leKIojMxMm5K0aVOYPduambyIn3MuRXkfRWGcc441N3XubGU4Djkk7IicS0jbt28nPT2djIyMsEMpMSpUqEDt2rUpW4RTJXuiiNW2bTZHdalScNVVcMUVcNFFXp/JuSjS09OpUqUK9erVQ/xvJe5UlbVr15Kenk79+vWL7HWTrulp48YQdjptGjRvDkOG2OMLL7RCfv7Bdy6qjIwMatSo4UmimIgINWrUKPIzuKRLFOnp9m/dusWws82b4dZb4YQTLEM1bFgMO3UutXiSKF7xON5J2fTUtSucdVacd/L551bEb9kyuO46eOAB2HffOO/UOecST9KdUQBUqlQMO8nMtD6Jzz6zJidPEs4lrbFjxyIifP/997uWTZ48mXPOOWe37Xr16sXo0aMB64jv168fDRs25Oijj6ZFixa8//77ex3LAw88QIMGDTjyyCOZmDUGK4dPPvmE5s2b06xZM0466SQWL14MwIgRI9h///1p1qwZzZo1Y/jw4XsdTyySMlHEzdtv25kDWBG/+fPh5JPDjck5t9dGjhzJSSedxKhRo2J+zr/+9S9WrVrFvHnzmDdvHuPHj2fjXnaSLliwgFGjRjF//nw++OADrrvuOnbs2LHHdtdeey2vvvoqs2fP5pJLLmHgwIG71nXt2pXZs2cze/Zsrrrqqr2KJ1ZJ2fRU5H77DW68Ed580zqt+/Sx+kxexM+5InPLLTbsqCg1awaPPRZ9m02bNjF16lQmTZrEueeeS//+/fN93S1btjBs2DCWLVtG+fLlATjwwAPp0qXLXsX7zjvv0K1bN8qXL0/9+vVp0KAB06dPp3Xr1rttJyL8+eefAGzYsIFDQr4Ev2R/E6rCK6/YJ3jTJvjPf+COO6zJyTmXEt5++206dOjAEUccwX777ce3335L8+bNoz5n8eLF1K1bl31jaHK+9dZbmTRp0h7Lu3XrRr9+/XZbtnLlSlq1arXrce3atVm5cuUezx0+fDgdO3akYsWK7LvvvkybNm3XurfeeospU6ZwxBFHMHjwYOrUqZNvjHurZCeK5cttTERamo2uPuqosCNyLmXl98s/XkaOHMktt9wC2Jf3yJEjad68eZ5XBxX0qqHBgwfHvK2qxrS/wYMHM2HCBFq2bMl///tfbrvtNoYPH06nTp24+OKLKV++PM888ww9e/bk008/LVC8hVHyEkVWEb+zzrIiflOnWrVXr8/kXMpZu3Ytn376KfPmzUNE2LFjByLCoEGDqFGjBuvWrdtt+z/++IOaNWvSoEEDli9fzsaNG6lSpUrUfRTkjKJ27dqsWLFi1+P09PQ9mpXWrFnDnDlzaNmyJWB9Eh06dACgRo0au7a7+uqr6du3bwxHoQioalLdypY9Xq+4Qgtn0SLVNm1UQXXy5EK+iHMuVgsWLAh1/88884z27t17t2Unn3yyTpkyRTMyMrRevXq7Yvzpp5+0bt26un79elVVveOOO7RXr166bds2VVX95Zdf9OWXX96reObNm6dNmzbVjIwMXbp0qdavX18zMzN322b79u1ao0YNXbRokaqqDh8+XM8///xdMWQZM2aMtmzZMtf95HbcgZlayO/dknFGkZkJjzwC990HFSvCCy/41UzOlQAjR47c41f9BRdcwGuvvUabNm145ZVXuPzyy8nIyKBs2bIMHz6cqlWrAjBw4EDuueceGjduTIUKFahUqRIDBgzYq3iaNGlCly5daNy4MWXKlGHIkCGUDlozOnbsyPDhwznkkEMYNmwYF1xwAaVKlaJ69eo8//zzADzxxBOMGzeOMmXKsN9++zFixIi9iidWorm0mSWycuXStEePmTz3XAGedOaZ8OGHcP75NibioIPiFp9zLtvChQtp1KhR2GGUOLkddxH5RlXTCvN6qXtGkZFhVy+VLg29e9vtggvCjso555JOag64mzrVLrDOKuJ3wQWeJJxzrpBSK1Fs2gQ33WSTCGVkgJ/yOhe6ZGveTnbxON6pkyg++wyOPhqefBJuuAHmzYPTTw87KudKtAoVKrB27VpPFsVEg/koKlSoUKSvm1p9FPvsY1VfTzwx7Eicc9i4gfT0dNasWRN2KCVG1gx3RSm5E8WYMfD993DXXdC2LXz3nQ+ccy6BlC1btkhnWnPhiGvTk4h0EJFFIrJYRPrlsr68iLwerP9aROrF8rr7bvnVZpm74AIYOxb++stWeJJwzrkiF7dEISKlgSHAWUBj4GIRaZxjsyuBdaraABgMPJTf61bfuZaBYxrBu+9aSfAvv7RKr8455+IinmcULYDFqrpUVf8CRgGdc2zTGXgxuD8aOFXyqchVe8fP/FL9aJgzB/r180qvzjkXZ/Hso6gFrIh4nA60zGsbVc0UkQ1ADeD3yI1EpDfQO3i47YjfvpjnlV4BqEmOY1WC+bHI5scimx+LbEcW9onxTBS5nRnkvEYulm1Q1aHAUAARmVnYYeipxo9FNj8W2fxYZPNjkU1EZhb2ufFsekoHImfUqA38ktc2IlIGqAr8EceYnHPOFVA8E8UMoKGI1BeRckA3YFyObcYBPYP7FwKfqo/Mcc65hBK3pqegz+EGYCJQGnheVeeLyACsLvo44DngZRFZjJ1JdIvhpYfGK+Yk5Mcimx+LbH4ssvmxyFboY5F0Zcadc84Vr9Sp9eSccy4uPFE455yLKmETRbzKfySjGI7FbSKyQETmisgnInJoGHEWh/yORcR2F4qIikjKXhoZy7EQkS7BZ2O+iLxW3DEWlxj+RuqKyCQRmRX8nXQMI854E5HnRWS1iMzLY72IyBPBcZorIs1jeuHCTrYdzxvW+b0EOAwoB8wBGufY5jrgmeB+N+D1sOMO8Vi0B/YJ7l9bko9FsF0VYAowDUgLO+4QPxcNgVlA9eDxAWHHHeKxGApcG9xvDPwUdtxxOhYnA82BeXms7wi8j41hawV8HcvrJuoZRVzKfySpfI+Fqk5S1S3Bw2nYmJVUFMvnAuDfwCAgoziDK2axHIurgSGqug5AVVcXc4zFJZZjocC+wf2q7DmmKyWo6hSij0XrDLykZhpQTUQOzu91EzVR5Fb+o1Ze26hqJpBV/iPVxHIsIl2J/WJIRfkeCxE5Dqijqu8WZ2AhiOVzcQRwhIhMFZFpItKh2KIrXrEci/7ApSKSDkwAbiye0BJOQb9PgMSdj6LIyn+kgJjfp4hcCqQBbeMaUXiiHgsRKYVVIe5VXAGFKJbPRRms+akddpb5uYgcrarr4xxbcYvlWFwMjFDVR0SkNTZ+62hV3Rn/8BJKob43E/WMwst/ZIvlWCAipwF3A+eq6rZiiq245XcsqgBHA5NF5CesDXZcinZox/o38o6qblfVZcAiLHGkmliOxZXAGwCq+hVQASsYWNLE9H2SU6ImCi//kS3fYxE0tzyLJYlUbYeGfI6Fqm5Q1ZqqWk9V62H9NeeqaqGLoSWwWP5G3sYudEBEamJNUUuLNcriEcuxWA6cCiAijbBEURLnZx0HXBZc/dQK2KCqq/J7UkI2PWn8yn8knRiPxX+BysCbQX/+clU9N7Sg4yTGY1EixHgsJgJniMgCYAdwh6quDS/q+IjxWPQBhonIrVhTS69U/GEpIiOxpsaaQX/MfUBZAFV9Buuf6QgsBrYAl8f0uil4rJxzzhWhRG16cs45lyA8UTjnnIvKE4VzzrmoPFE455yLyhOFc865qDxRuIQjIjtEZHbErV6UbevlVSmzgPucHFQfnROUvDiyEK9xjYhcFtzvJSKHRKwbLiKNizjOGSLSLIbn3CIi++ztvl3J5YnCJaKtqtos4vZTMe23u6oeixWb/G9Bn6yqz6jqS8HDXsAhEeuuUtUFRRJldpxPEVuctwCeKFyheaJwSSE4c/hcRL4Nbifksk0TEZkenIXMFZGGwfJLI5Y/KyKl89ndFKBB8NxTgzkMvgtq/ZcPlj8o2XOAPBws6y8it4vIhVjNrVeDfVYMzgTSRORaERkUEXMvEflfIeP8ioiCbiLytIjMFJt74v5g2U1YwpokIpOCZWeIyFfBcXxTRCrnsx9XwnmicImoYkSz09hg2WrgdFVtDnQFnsjledcAj6tqM+yLOj0o19AVODFYvgPons/+OwHfiUgFYATQVVWPwSoZXCsi+wF/B5qoalNgYOSTVXU0MBP75d9MVbdGrB4NnB/xuCvweiHj7ICV6chyt6qmAU2BtiLSVFWfwGr5tFfV9kEpj3uA04JjORO4LZ/9uBIuIUt4uBJva/BlGaks8GTQJr8Dq1uU01fA3SJSGxijqj+KyKnA8cCMoLxJRSzp5OZVEdkK/ISVoT4SWKaqPwTrXwSuB57E5roYLiLvATGXNFfVNSKyNKiz82Owj6nB6xYkzkpYuYrIGcq6iEhv7O/6YGyCnrk5ntsqWD412E857Lg5lydPFC5Z3Ar8BhyLnQnvMSmRqr4mIl8DZwMTReQqrKzyi6p6Zwz76B5ZQFBEcp3fJKgt1AIrMtcNuAE4pQDv5XWgC/A9MFZVVexbO+Y4sVncHgSGAOeLSH3gduBvqrpOREZghe9yEuAjVb24APG6Es6bnlyyqAqsCuYP6IH9mt6NiBwGLA2aW8ZhTTCfABeKyAHBNvtJ7HOKfw/UE5EGweMewGdBm35VVZ2AdRTnduXRRqzseW7GAOdhcyS8HiwrUJyquh1rQmoVNFvtC2wGNojIgcBZecQyDTgx6z2JyD4iktvZmXO7eKJwyeIpoKeITMOanTbnsk1XYJ6IzAaOwqZ8XIB9oX4oInOBj7BmmXypagZWXfNNEfkO2Ak8g33pvhu83mfY2U5OI4Bnsjqzc7zuOmABcKiqTg+WFTjOoO/jEeB2VZ2DzY89H3gea87KMhR4X0Qmqeoa7IqskcF+pmHHyrk8efVY55xzUfkZhXPOuag8UTjnnIvKE4VzzrmoPFE455yLyhOFc865qDxROOeci8oThXPOuaj+H1qUHB+BF7pTAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.title('Receiver Operating Characteristic')\n", "plt.plot(fpr,tpr, 'b', label = 'AUC = %0.2f' % roc_auc)\n", "plt.legend(loc = 'lower right')\n", "plt.plot([0,1],[0,1], 'r--')\n", "plt.xlim([0,1])\n", "plt.ylim([0,1])\n", "plt.ylabel('True Positive Rate')\n", "plt.xlabel('False Positive Rate')\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 }