{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 2. The Simple Regression Model\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  J.M. Wooldridge (2016) Introductory Econometrics: A Modern Approach,\n",
      "  Cengage Learning, 6th edition.\n",
      "\n",
      "  401k       401ksubs    admnrev       affairs     airfare\n",
      "  alcohol    apple       approval      athlet1     athlet2\n",
      "  attend     audit       barium        beauty      benefits\n",
      "  beveridge  big9salary  bwght         bwght2      campus\n",
      "  card       catholic    cement        census2000  ceosal1\n",
      "  ceosal2    charity     consump       corn        countymurders\n",
      "  cps78_85   cps91       crime1        crime2      crime3\n",
      "  crime4     discrim     driving       earns       econmath\n",
      "  elem94_95  engin       expendshares  ezanders    ezunem\n",
      "  fair       fertil1     fertil2       fertil3     fish\n",
      "  fringe     gpa1        gpa2          gpa3        happiness\n",
      "  hprice1    hprice2     hprice3       hseinv      htv\n",
      "  infmrt     injury      intdef        intqrt      inven\n",
      "  jtrain     jtrain2     jtrain3       kielmc      lawsch85\n",
      "  loanapp    lowbrth     mathpnl       meap00_01   meap01\n",
      "  meap93     meapsingle  minwage       mlb1        mroz\n",
      "  murder     nbasal      nyse          okun        openness\n",
      "  pension    phillips    pntsprd       prison      prminwge\n",
      "  rdchem     rdtelec     recid         rental      return\n",
      "  saving     sleep75     slp75_81      smoke       traffic1\n",
      "  traffic2   twoyear     volat         vote1       vote2\n",
      "  voucher    wage1       wage2         wagepan     wageprc\n",
      "  wine\n"
     ]
    }
   ],
   "source": [
    "from wooldridge import *\n",
    "import pandas as pd\n",
    "import statsmodels.api as sm\n",
    "import statsmodels.formula.api as smf\n",
    "import numpy as np\n",
    "\n",
    "dataWoo()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Example 2.3 CEO Salary & Return on Equity"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "name of dataset: ceosal1\n",
      "no of variables: 12\n",
      "no of observations: 209\n",
      "\n",
      "+----------+-------------------------------+\n",
      "| variable | label                         |\n",
      "+----------+-------------------------------+\n",
      "| salary   | 1990 salary, thousands $      |\n",
      "| pcsalary | % change salary, 89-90        |\n",
      "| sales    | 1990 firm sales, millions $   |\n",
      "| roe      | return on equity, 88-90 avg   |\n",
      "| pcroe    | % change roe, 88-90           |\n",
      "| ros      | return on firm's stock, 88-90 |\n",
      "| indus    | =1 if industrial firm         |\n",
      "| finance  | =1 if financial firm          |\n",
      "| consprod | =1 if consumer product firm   |\n",
      "| utility  | =1 if transport. or utilties  |\n",
      "| lsalary  | natural log of salary         |\n",
      "| lsales   | natural log of sales          |\n",
      "+----------+-------------------------------+\n",
      "\n",
      "I took a random sample of data reported in the May 6, 1991 issue of\n",
      "Businessweek.\n"
     ]
    }
   ],
   "source": [
    "df = dataWoo('ceosal1')\n",
    "dataWoo('ceosal1', description=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>salary</th>\n",
       "      <th>pcsalary</th>\n",
       "      <th>sales</th>\n",
       "      <th>roe</th>\n",
       "      <th>pcroe</th>\n",
       "      <th>ros</th>\n",
       "      <th>indus</th>\n",
       "      <th>finance</th>\n",
       "      <th>consprod</th>\n",
       "      <th>utility</th>\n",
       "      <th>lsalary</th>\n",
       "      <th>lsales</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1095</td>\n",
       "      <td>20</td>\n",
       "      <td>27595.000000</td>\n",
       "      <td>14.1</td>\n",
       "      <td>106.400002</td>\n",
       "      <td>191</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>6.998509</td>\n",
       "      <td>10.225389</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1001</td>\n",
       "      <td>32</td>\n",
       "      <td>9958.000000</td>\n",
       "      <td>10.9</td>\n",
       "      <td>-30.600000</td>\n",
       "      <td>13</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>6.908755</td>\n",
       "      <td>9.206132</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1122</td>\n",
       "      <td>9</td>\n",
       "      <td>6125.899902</td>\n",
       "      <td>23.5</td>\n",
       "      <td>-16.299999</td>\n",
       "      <td>14</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>7.022868</td>\n",
       "      <td>8.720281</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>578</td>\n",
       "      <td>-9</td>\n",
       "      <td>16246.000000</td>\n",
       "      <td>5.9</td>\n",
       "      <td>-25.700001</td>\n",
       "      <td>-21</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>6.359574</td>\n",
       "      <td>9.695602</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1368</td>\n",
       "      <td>7</td>\n",
       "      <td>21783.199219</td>\n",
       "      <td>13.8</td>\n",
       "      <td>-3.000000</td>\n",
       "      <td>56</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>7.221105</td>\n",
       "      <td>9.988894</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   salary  pcsalary         sales   roe       pcroe  ros  indus  finance  \\\n",
       "0    1095        20  27595.000000  14.1  106.400002  191      1        0   \n",
       "1    1001        32   9958.000000  10.9  -30.600000   13      1        0   \n",
       "2    1122         9   6125.899902  23.5  -16.299999   14      1        0   \n",
       "3     578        -9  16246.000000   5.9  -25.700001  -21      1        0   \n",
       "4    1368         7  21783.199219  13.8   -3.000000   56      1        0   \n",
       "\n",
       "   consprod  utility   lsalary     lsales  \n",
       "0         0        0  6.998509  10.225389  \n",
       "1         0        0  6.908755   9.206132  \n",
       "2         0        0  7.022868   8.720281  \n",
       "3         0        0  6.359574   9.695602  \n",
       "4         0        0  7.221105   9.988894  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"simpletable\">\n",
       "<caption>OLS Regression Results</caption>\n",
       "<tr>\n",
       "  <th>Dep. Variable:</th>         <td>salary</td>      <th>  R-squared:         </th> <td>   0.013</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Model:</th>                   <td>OLS</td>       <th>  Adj. R-squared:    </th> <td>   0.008</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Method:</th>             <td>Least Squares</td>  <th>  F-statistic:       </th> <td>   2.767</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Date:</th>             <td>Tue, 09 Jul 2024</td> <th>  Prob (F-statistic):</th>  <td>0.0978</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Time:</th>                 <td>21:56:56</td>     <th>  Log-Likelihood:    </th> <td> -1804.5</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>No. Observations:</th>      <td>   209</td>      <th>  AIC:               </th> <td>   3613.</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Df Residuals:</th>          <td>   207</td>      <th>  BIC:               </th> <td>   3620.</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Df Model:</th>              <td>     1</td>      <th>                     </th>     <td> </td>   \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Covariance Type:</th>      <td>nonrobust</td>    <th>                     </th>     <td> </td>   \n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "      <td></td>         <th>coef</th>     <th>std err</th>      <th>t</th>      <th>P>|t|</th>  <th>[0.025</th>    <th>0.975]</th>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Intercept</th> <td>  963.1913</td> <td>  213.240</td> <td>    4.517</td> <td> 0.000</td> <td>  542.790</td> <td> 1383.592</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>roe</th>       <td>   18.5012</td> <td>   11.123</td> <td>    1.663</td> <td> 0.098</td> <td>   -3.428</td> <td>   40.431</td>\n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "  <th>Omnibus:</th>       <td>311.096</td> <th>  Durbin-Watson:     </th> <td>   2.105</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Prob(Omnibus):</th> <td> 0.000</td>  <th>  Jarque-Bera (JB):  </th> <td>31120.902</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Skew:</th>          <td> 6.915</td>  <th>  Prob(JB):          </th> <td>    0.00</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Kurtosis:</th>      <td>61.158</td>  <th>  Cond. No.          </th> <td>    43.3</td> \n",
       "</tr>\n",
       "</table><br/><br/>Notes:<br/>[1] Standard Errors assume that the covariance matrix of the errors is correctly specified."
      ],
      "text/latex": [
       "\\begin{center}\n",
       "\\begin{tabular}{lclc}\n",
       "\\toprule\n",
       "\\textbf{Dep. Variable:}    &      salary      & \\textbf{  R-squared:         } &     0.013   \\\\\n",
       "\\textbf{Model:}            &       OLS        & \\textbf{  Adj. R-squared:    } &     0.008   \\\\\n",
       "\\textbf{Method:}           &  Least Squares   & \\textbf{  F-statistic:       } &     2.767   \\\\\n",
       "\\textbf{Date:}             & Tue, 09 Jul 2024 & \\textbf{  Prob (F-statistic):} &   0.0978    \\\\\n",
       "\\textbf{Time:}             &     21:56:56     & \\textbf{  Log-Likelihood:    } &   -1804.5   \\\\\n",
       "\\textbf{No. Observations:} &         209      & \\textbf{  AIC:               } &     3613.   \\\\\n",
       "\\textbf{Df Residuals:}     &         207      & \\textbf{  BIC:               } &     3620.   \\\\\n",
       "\\textbf{Df Model:}         &           1      & \\textbf{                     } &             \\\\\n",
       "\\textbf{Covariance Type:}  &    nonrobust     & \\textbf{                     } &             \\\\\n",
       "\\bottomrule\n",
       "\\end{tabular}\n",
       "\\begin{tabular}{lcccccc}\n",
       "                   & \\textbf{coef} & \\textbf{std err} & \\textbf{t} & \\textbf{P$> |$t$|$} & \\textbf{[0.025} & \\textbf{0.975]}  \\\\\n",
       "\\midrule\n",
       "\\textbf{Intercept} &     963.1913  &      213.240     &     4.517  &         0.000        &      542.790    &     1383.592     \\\\\n",
       "\\textbf{roe}       &      18.5012  &       11.123     &     1.663  &         0.098        &       -3.428    &       40.431     \\\\\n",
       "\\bottomrule\n",
       "\\end{tabular}\n",
       "\\begin{tabular}{lclc}\n",
       "\\textbf{Omnibus:}       & 311.096 & \\textbf{  Durbin-Watson:     } &     2.105  \\\\\n",
       "\\textbf{Prob(Omnibus):} &   0.000 & \\textbf{  Jarque-Bera (JB):  } & 31120.902  \\\\\n",
       "\\textbf{Skew:}          &   6.915 & \\textbf{  Prob(JB):          } &      0.00  \\\\\n",
       "\\textbf{Kurtosis:}      &  61.158 & \\textbf{  Cond. No.          } &      43.3  \\\\\n",
       "\\bottomrule\n",
       "\\end{tabular}\n",
       "%\\caption{OLS Regression Results}\n",
       "\\end{center}\n",
       "\n",
       "Notes: \\newline\n",
       " [1] Standard Errors assume that the covariance matrix of the errors is correctly specified."
      ],
      "text/plain": [
       "<class 'statsmodels.iolib.summary.Summary'>\n",
       "\"\"\"\n",
       "                            OLS Regression Results                            \n",
       "==============================================================================\n",
       "Dep. Variable:                 salary   R-squared:                       0.013\n",
       "Model:                            OLS   Adj. R-squared:                  0.008\n",
       "Method:                 Least Squares   F-statistic:                     2.767\n",
       "Date:                Tue, 09 Jul 2024   Prob (F-statistic):             0.0978\n",
       "Time:                        21:56:56   Log-Likelihood:                -1804.5\n",
       "No. Observations:                 209   AIC:                             3613.\n",
       "Df Residuals:                     207   BIC:                             3620.\n",
       "Df Model:                           1                                         \n",
       "Covariance Type:            nonrobust                                         \n",
       "==============================================================================\n",
       "                 coef    std err          t      P>|t|      [0.025      0.975]\n",
       "------------------------------------------------------------------------------\n",
       "Intercept    963.1913    213.240      4.517      0.000     542.790    1383.592\n",
       "roe           18.5012     11.123      1.663      0.098      -3.428      40.431\n",
       "==============================================================================\n",
       "Omnibus:                      311.096   Durbin-Watson:                   2.105\n",
       "Prob(Omnibus):                  0.000   Jarque-Bera (JB):            31120.902\n",
       "Skew:                           6.915   Prob(JB):                         0.00\n",
       "Kurtosis:                      61.158   Cond. No.                         43.3\n",
       "==============================================================================\n",
       "\n",
       "Notes:\n",
       "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
       "\"\"\""
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model = smf.ols(formula='salary ~ 1 + roe', data=df).fit()\n",
    "model.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                            OLS Regression Results                            \n",
      "==============================================================================\n",
      "Dep. Variable:                 salary   R-squared:                       0.013\n",
      "Model:                            OLS   Adj. R-squared:                  0.008\n",
      "Method:                 Least Squares   F-statistic:                     2.767\n",
      "Date:                Tue, 09 Jul 2024   Prob (F-statistic):             0.0978\n",
      "Time:                        21:56:56   Log-Likelihood:                -1804.5\n",
      "No. Observations:                 209   AIC:                             3613.\n",
      "Df Residuals:                     207   BIC:                             3620.\n",
      "Df Model:                           1                                         \n",
      "Covariance Type:            nonrobust                                         \n",
      "==============================================================================\n",
      "                 coef    std err          t      P>|t|      [0.025      0.975]\n",
      "------------------------------------------------------------------------------\n",
      "Intercept    963.1913    213.240      4.517      0.000     542.790    1383.592\n",
      "roe           18.5012     11.123      1.663      0.098      -3.428      40.431\n",
      "==============================================================================\n",
      "Omnibus:                      311.096   Durbin-Watson:                   2.105\n",
      "Prob(Omnibus):                  0.000   Jarque-Bera (JB):            31120.902\n",
      "Skew:                           6.915   Prob(JB):                         0.00\n",
      "Kurtosis:                      61.158   Cond. No.                         43.3\n",
      "==============================================================================\n",
      "\n",
      "Notes:\n",
      "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n"
     ]
    }
   ],
   "source": [
    "print(model.summary())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Intercept    963.191336\n",
       "roe           18.501186\n",
       "dtype: float64"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.params"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "209.0"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.nobs"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "if the return on equity increases\n",
    "by one percentage point,  then salary is predicted to change by about 18.5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    1518.226927\n",
       "dtype: float64"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "predicted_salary = model.predict(pd.DataFrame({'roe': [30]}))\n",
    "predicted_salary"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Example 2.4 Wage Equation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "name of dataset: wage1\n",
      "no of variables: 24\n",
      "no of observations: 526\n",
      "\n",
      "+----------+---------------------------------+\n",
      "| variable | label                           |\n",
      "+----------+---------------------------------+\n",
      "| wage     | average hourly earnings         |\n",
      "| educ     | years of education              |\n",
      "| exper    | years potential experience      |\n",
      "| tenure   | years with current employer     |\n",
      "| nonwhite | =1 if nonwhite                  |\n",
      "| female   | =1 if female                    |\n",
      "| married  | =1 if married                   |\n",
      "| numdep   | number of dependents            |\n",
      "| smsa     | =1 if live in SMSA              |\n",
      "| northcen | =1 if live in north central U.S |\n",
      "| south    | =1 if live in southern region   |\n",
      "| west     | =1 if live in western region    |\n",
      "| construc | =1 if work in construc. indus.  |\n",
      "| ndurman  | =1 if in nondur. manuf. indus.  |\n",
      "| trcommpu | =1 if in trans, commun, pub ut  |\n",
      "| trade    | =1 if in wholesale or retail    |\n",
      "| services | =1 if in services indus.        |\n",
      "| profserv | =1 if in prof. serv. indus.     |\n",
      "| profocc  | =1 if in profess. occupation    |\n",
      "| clerocc  | =1 if in clerical occupation    |\n",
      "| servocc  | =1 if in service occupation     |\n",
      "| lwage    | log(wage)                       |\n",
      "| expersq  | exper^2                         |\n",
      "| tenursq  | tenure^2                        |\n",
      "+----------+---------------------------------+\n",
      "\n",
      "These are data from the 1976 Current Population Survey, collected by\n",
      "Henry Farber when he and I were colleagues at MIT in 1988.\n"
     ]
    }
   ],
   "source": [
    "df2 = dataWoo('wage1')\n",
    "dataWoo('wage1', description=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>wage</th>\n",
       "      <th>educ</th>\n",
       "      <th>exper</th>\n",
       "      <th>tenure</th>\n",
       "      <th>nonwhite</th>\n",
       "      <th>female</th>\n",
       "      <th>married</th>\n",
       "      <th>numdep</th>\n",
       "      <th>smsa</th>\n",
       "      <th>northcen</th>\n",
       "      <th>...</th>\n",
       "      <th>trcommpu</th>\n",
       "      <th>trade</th>\n",
       "      <th>services</th>\n",
       "      <th>profserv</th>\n",
       "      <th>profocc</th>\n",
       "      <th>clerocc</th>\n",
       "      <th>servocc</th>\n",
       "      <th>lwage</th>\n",
       "      <th>expersq</th>\n",
       "      <th>tenursq</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>3.10</td>\n",
       "      <td>11</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.131402</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>3.24</td>\n",
       "      <td>12</td>\n",
       "      <td>22</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1.175573</td>\n",
       "      <td>484</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3.00</td>\n",
       "      <td>11</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.098612</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>6.00</td>\n",
       "      <td>8</td>\n",
       "      <td>44</td>\n",
       "      <td>28</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1.791759</td>\n",
       "      <td>1936</td>\n",
       "      <td>784</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5.30</td>\n",
       "      <td>12</td>\n",
       "      <td>7</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.667707</td>\n",
       "      <td>49</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 24 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   wage  educ  exper  tenure  nonwhite  female  married  numdep  smsa  \\\n",
       "0  3.10    11      2       0         0       1        0       2     1   \n",
       "1  3.24    12     22       2         0       1        1       3     1   \n",
       "2  3.00    11      2       0         0       0        0       2     0   \n",
       "3  6.00     8     44      28         0       0        1       0     1   \n",
       "4  5.30    12      7       2         0       0        1       1     0   \n",
       "\n",
       "   northcen  ...  trcommpu  trade  services  profserv  profocc  clerocc  \\\n",
       "0         0  ...         0      0         0         0        0        0   \n",
       "1         0  ...         0      0         1         0        0        0   \n",
       "2         0  ...         0      1         0         0        0        0   \n",
       "3         0  ...         0      0         0         0        0        1   \n",
       "4         0  ...         0      0         0         0        0        0   \n",
       "\n",
       "   servocc     lwage  expersq  tenursq  \n",
       "0        0  1.131402        4        0  \n",
       "1        1  1.175573      484        4  \n",
       "2        0  1.098612        4        0  \n",
       "3        0  1.791759     1936      784  \n",
       "4        0  1.667707       49        4  \n",
       "\n",
       "[5 rows x 24 columns]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df2.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                            OLS Regression Results                            \n",
      "==============================================================================\n",
      "Dep. Variable:                   wage   R-squared:                       0.165\n",
      "Model:                            OLS   Adj. R-squared:                  0.163\n",
      "Method:                 Least Squares   F-statistic:                     103.4\n",
      "Date:                Tue, 09 Jul 2024   Prob (F-statistic):           2.78e-22\n",
      "Time:                        21:56:56   Log-Likelihood:                -1385.7\n",
      "No. Observations:                 526   AIC:                             2775.\n",
      "Df Residuals:                     524   BIC:                             2784.\n",
      "Df Model:                           1                                         \n",
      "Covariance Type:            nonrobust                                         \n",
      "==============================================================================\n",
      "                 coef    std err          t      P>|t|      [0.025      0.975]\n",
      "------------------------------------------------------------------------------\n",
      "Intercept     -0.9049      0.685     -1.321      0.187      -2.250       0.441\n",
      "educ           0.5414      0.053     10.167      0.000       0.437       0.646\n",
      "==============================================================================\n",
      "Omnibus:                      212.554   Durbin-Watson:                   1.824\n",
      "Prob(Omnibus):                  0.000   Jarque-Bera (JB):              807.843\n",
      "Skew:                           1.861   Prob(JB):                    3.79e-176\n",
      "Kurtosis:                       7.797   Cond. No.                         60.2\n",
      "==============================================================================\n",
      "\n",
      "Notes:\n",
      "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n"
     ]
    }
   ],
   "source": [
    "model2 = smf.ols(formula='wage ~ educ', data=df2).fit()\n",
    "print(model2.summary())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The intercept of −0.90 literally means that a person\n",
    "with no education has a predicted hourly wage of −90¢ an hour"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "one more year of education increases hourly wage by\n",
    "54¢ an hour"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Example 2.5 Vote share"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "name of dataset: vote1\n",
      "no of variables: 10\n",
      "no of observations: 173\n",
      "\n",
      "+----------+---------------------------------+\n",
      "| variable | label                           |\n",
      "+----------+---------------------------------+\n",
      "| state    | state postal code               |\n",
      "| district | congressional district          |\n",
      "| democA   | =1 if A is democrat             |\n",
      "| voteA    | percent vote for A              |\n",
      "| expendA  | camp. expends. by A, $1000s     |\n",
      "| expendB  | camp. expends. by B, $1000s     |\n",
      "| prtystrA | % vote for president            |\n",
      "| lexpendA | log(expendA)                    |\n",
      "| lexpendB | log(expendB)                    |\n",
      "| shareA   | 100*(expendA/(expendA+expendB)) |\n",
      "+----------+---------------------------------+\n",
      "\n",
      "From M. Barone and G. Ujifusa, The Almanac of American Politics, 1992.\n",
      "Washington, DC: National Journal.\n"
     ]
    }
   ],
   "source": [
    "df3 = dataWoo('vote1')\n",
    "dataWoo('vote1', description=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>state</th>\n",
       "      <th>district</th>\n",
       "      <th>democA</th>\n",
       "      <th>voteA</th>\n",
       "      <th>expendA</th>\n",
       "      <th>expendB</th>\n",
       "      <th>prtystrA</th>\n",
       "      <th>lexpendA</th>\n",
       "      <th>lexpendB</th>\n",
       "      <th>shareA</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>AL</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>68</td>\n",
       "      <td>328.295990</td>\n",
       "      <td>8.737000</td>\n",
       "      <td>41</td>\n",
       "      <td>5.793916</td>\n",
       "      <td>2.167567</td>\n",
       "      <td>97.407669</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>AK</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>62</td>\n",
       "      <td>626.377014</td>\n",
       "      <td>402.476990</td>\n",
       "      <td>60</td>\n",
       "      <td>6.439952</td>\n",
       "      <td>5.997638</td>\n",
       "      <td>60.881039</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>AZ</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>73</td>\n",
       "      <td>99.607002</td>\n",
       "      <td>3.065000</td>\n",
       "      <td>55</td>\n",
       "      <td>4.601233</td>\n",
       "      <td>1.120048</td>\n",
       "      <td>97.014763</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>AZ</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>69</td>\n",
       "      <td>319.690002</td>\n",
       "      <td>26.281000</td>\n",
       "      <td>64</td>\n",
       "      <td>5.767352</td>\n",
       "      <td>3.268846</td>\n",
       "      <td>92.403702</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>AR</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>75</td>\n",
       "      <td>159.220993</td>\n",
       "      <td>60.054001</td>\n",
       "      <td>66</td>\n",
       "      <td>5.070293</td>\n",
       "      <td>4.095244</td>\n",
       "      <td>72.612473</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  state  district  democA  voteA     expendA     expendB  prtystrA  lexpendA  \\\n",
       "0    AL         7       1     68  328.295990    8.737000        41  5.793916   \n",
       "1    AK         1       0     62  626.377014  402.476990        60  6.439952   \n",
       "2    AZ         2       1     73   99.607002    3.065000        55  4.601233   \n",
       "3    AZ         3       0     69  319.690002   26.281000        64  5.767352   \n",
       "4    AR         3       0     75  159.220993   60.054001        66  5.070293   \n",
       "\n",
       "   lexpendB     shareA  \n",
       "0  2.167567  97.407669  \n",
       "1  5.997638  60.881039  \n",
       "2  1.120048  97.014763  \n",
       "3  3.268846  92.403702  \n",
       "4  4.095244  72.612473  "
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df3.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                            OLS Regression Results                            \n",
      "==============================================================================\n",
      "Dep. Variable:                  voteA   R-squared:                       0.856\n",
      "Model:                            OLS   Adj. R-squared:                  0.855\n",
      "Method:                 Least Squares   F-statistic:                     1018.\n",
      "Date:                Tue, 09 Jul 2024   Prob (F-statistic):           6.63e-74\n",
      "Time:                        21:56:56   Log-Likelihood:                -565.20\n",
      "No. Observations:                 173   AIC:                             1134.\n",
      "Df Residuals:                     171   BIC:                             1141.\n",
      "Df Model:                           1                                         \n",
      "Covariance Type:            nonrobust                                         \n",
      "==============================================================================\n",
      "                 coef    std err          t      P>|t|      [0.025      0.975]\n",
      "------------------------------------------------------------------------------\n",
      "Intercept     26.8122      0.887     30.221      0.000      25.061      28.564\n",
      "shareA         0.4638      0.015     31.901      0.000       0.435       0.493\n",
      "==============================================================================\n",
      "Omnibus:                       20.747   Durbin-Watson:                   1.826\n",
      "Prob(Omnibus):                  0.000   Jarque-Bera (JB):               44.613\n",
      "Skew:                           0.525   Prob(JB):                     2.05e-10\n",
      "Kurtosis:                       5.255   Cond. No.                         112.\n",
      "==============================================================================\n",
      "\n",
      "Notes:\n",
      "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n"
     ]
    }
   ],
   "source": [
    "model3 = smf.ols(formula='voteA ~ shareA', data=df3).fit()\n",
    "print(model3.summary())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "if Candidate A’s share of spending increases by one percentage point, Candidate A\n",
    "receives almost one-half a percentage point (0.464) more of the total vote."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Example 2.6 Table 2.2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['salary_hat'] = model.fittedvalues\n",
    "df['uhat'] = model.resid"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>roe</th>\n",
       "      <th>salary</th>\n",
       "      <th>salary_hat</th>\n",
       "      <th>uhat</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>14.100000</td>\n",
       "      <td>1095</td>\n",
       "      <td>1224.058071</td>\n",
       "      <td>-129.058071</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10.900000</td>\n",
       "      <td>1001</td>\n",
       "      <td>1164.854261</td>\n",
       "      <td>-163.854261</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>23.500000</td>\n",
       "      <td>1122</td>\n",
       "      <td>1397.969216</td>\n",
       "      <td>-275.969216</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>5.900000</td>\n",
       "      <td>578</td>\n",
       "      <td>1072.348338</td>\n",
       "      <td>-494.348338</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>13.800000</td>\n",
       "      <td>1368</td>\n",
       "      <td>1218.507712</td>\n",
       "      <td>149.492288</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>20.000000</td>\n",
       "      <td>1145</td>\n",
       "      <td>1333.215063</td>\n",
       "      <td>-188.215063</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>16.400000</td>\n",
       "      <td>1078</td>\n",
       "      <td>1266.610785</td>\n",
       "      <td>-188.610785</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>16.299999</td>\n",
       "      <td>1094</td>\n",
       "      <td>1264.760660</td>\n",
       "      <td>-170.760660</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>10.500000</td>\n",
       "      <td>1237</td>\n",
       "      <td>1157.453793</td>\n",
       "      <td>79.546207</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>26.299999</td>\n",
       "      <td>833</td>\n",
       "      <td>1449.772523</td>\n",
       "      <td>-616.772523</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>25.900000</td>\n",
       "      <td>567</td>\n",
       "      <td>1442.372056</td>\n",
       "      <td>-875.372056</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>26.799999</td>\n",
       "      <td>933</td>\n",
       "      <td>1459.023116</td>\n",
       "      <td>-526.023116</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>14.800000</td>\n",
       "      <td>1339</td>\n",
       "      <td>1237.008898</td>\n",
       "      <td>101.991102</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>22.299999</td>\n",
       "      <td>937</td>\n",
       "      <td>1375.767778</td>\n",
       "      <td>-438.767778</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>56.299999</td>\n",
       "      <td>2011</td>\n",
       "      <td>2004.808114</td>\n",
       "      <td>6.191886</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>12.600000</td>\n",
       "      <td>1585</td>\n",
       "      <td>1196.306291</td>\n",
       "      <td>388.693709</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          roe  salary   salary_hat        uhat\n",
       "0   14.100000    1095  1224.058071 -129.058071\n",
       "1   10.900000    1001  1164.854261 -163.854261\n",
       "2   23.500000    1122  1397.969216 -275.969216\n",
       "3    5.900000     578  1072.348338 -494.348338\n",
       "4   13.800000    1368  1218.507712  149.492288\n",
       "5   20.000000    1145  1333.215063 -188.215063\n",
       "6   16.400000    1078  1266.610785 -188.610785\n",
       "7   16.299999    1094  1264.760660 -170.760660\n",
       "8   10.500000    1237  1157.453793   79.546207\n",
       "9   26.299999     833  1449.772523 -616.772523\n",
       "10  25.900000     567  1442.372056 -875.372056\n",
       "11  26.799999     933  1459.023116 -526.023116\n",
       "12  14.800000    1339  1237.008898  101.991102\n",
       "13  22.299999     937  1375.767778 -438.767778\n",
       "14  56.299999    2011  2004.808114    6.191886\n",
       "15  12.600000    1585  1196.306291  388.693709"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[['roe','salary','salary_hat','uhat']].head(16)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The first four CEOs have lower salaries than what we predicted from the OLS regression\n",
    "line (2.26); in other words, given only the firm’s roe, these CEOs make less than what we\n",
    "predicted. As can be seen from the positive uhat, the fifth CEO makes more than predicted from\n",
    "the OLS regression line."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Example 2.7 Wage & education"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "np.float64(5.896102674787035)"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df2['wage'].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "np.float64(12.562737642585551)"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df2['educ'].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                            OLS Regression Results                            \n",
      "==============================================================================\n",
      "Dep. Variable:                   wage   R-squared:                       0.165\n",
      "Model:                            OLS   Adj. R-squared:                  0.163\n",
      "Method:                 Least Squares   F-statistic:                     103.4\n",
      "Date:                Tue, 09 Jul 2024   Prob (F-statistic):           2.78e-22\n",
      "Time:                        21:56:56   Log-Likelihood:                -1385.7\n",
      "No. Observations:                 526   AIC:                             2775.\n",
      "Df Residuals:                     524   BIC:                             2784.\n",
      "Df Model:                           1                                         \n",
      "Covariance Type:            nonrobust                                         \n",
      "==============================================================================\n",
      "                 coef    std err          t      P>|t|      [0.025      0.975]\n",
      "------------------------------------------------------------------------------\n",
      "Intercept     -0.9049      0.685     -1.321      0.187      -2.250       0.441\n",
      "educ           0.5414      0.053     10.167      0.000       0.437       0.646\n",
      "==============================================================================\n",
      "Omnibus:                      212.554   Durbin-Watson:                   1.824\n",
      "Prob(Omnibus):                  0.000   Jarque-Bera (JB):              807.843\n",
      "Skew:                           1.861   Prob(JB):                    3.79e-176\n",
      "Kurtosis:                       7.797   Cond. No.                         60.2\n",
      "==============================================================================\n",
      "\n",
      "Notes:\n",
      "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n"
     ]
    }
   ],
   "source": [
    "print(model2.summary())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    5.894621\n",
       "dtype: float64"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model2.predict(pd.DataFrame({'educ': [12.56]}))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "$\\bar{x}$ and $\\bar{y}$  fall on the regression line"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Example 2.8. CEO Salary - R-squared"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                            OLS Regression Results                            \n",
      "==============================================================================\n",
      "Dep. Variable:                 salary   R-squared:                       0.013\n",
      "Model:                            OLS   Adj. R-squared:                  0.008\n",
      "Method:                 Least Squares   F-statistic:                     2.767\n",
      "Date:                Tue, 09 Jul 2024   Prob (F-statistic):             0.0978\n",
      "Time:                        21:56:56   Log-Likelihood:                -1804.5\n",
      "No. Observations:                 209   AIC:                             3613.\n",
      "Df Residuals:                     207   BIC:                             3620.\n",
      "Df Model:                           1                                         \n",
      "Covariance Type:            nonrobust                                         \n",
      "==============================================================================\n",
      "                 coef    std err          t      P>|t|      [0.025      0.975]\n",
      "------------------------------------------------------------------------------\n",
      "Intercept    963.1913    213.240      4.517      0.000     542.790    1383.592\n",
      "roe           18.5012     11.123      1.663      0.098      -3.428      40.431\n",
      "==============================================================================\n",
      "Omnibus:                      311.096   Durbin-Watson:                   2.105\n",
      "Prob(Omnibus):                  0.000   Jarque-Bera (JB):            31120.902\n",
      "Skew:                           6.915   Prob(JB):                         0.00\n",
      "Kurtosis:                      61.158   Cond. No.                         43.3\n",
      "==============================================================================\n",
      "\n",
      "Notes:\n",
      "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n"
     ]
    }
   ],
   "source": [
    "print(model.summary())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "np.float64(0.01318862408103405)"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.rsquared"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The firm’s return on equity explains only about 1.3% of the variation in salaries for this sample of 209 CEOs. "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Example2.9 Voting outcome - R-squared."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                            OLS Regression Results                            \n",
      "==============================================================================\n",
      "Dep. Variable:                  voteA   R-squared:                       0.856\n",
      "Model:                            OLS   Adj. R-squared:                  0.855\n",
      "Method:                 Least Squares   F-statistic:                     1018.\n",
      "Date:                Tue, 09 Jul 2024   Prob (F-statistic):           6.63e-74\n",
      "Time:                        21:56:56   Log-Likelihood:                -565.20\n",
      "No. Observations:                 173   AIC:                             1134.\n",
      "Df Residuals:                     171   BIC:                             1141.\n",
      "Df Model:                           1                                         \n",
      "Covariance Type:            nonrobust                                         \n",
      "==============================================================================\n",
      "                 coef    std err          t      P>|t|      [0.025      0.975]\n",
      "------------------------------------------------------------------------------\n",
      "Intercept     26.8122      0.887     30.221      0.000      25.061      28.564\n",
      "shareA         0.4638      0.015     31.901      0.000       0.435       0.493\n",
      "==============================================================================\n",
      "Omnibus:                       20.747   Durbin-Watson:                   1.826\n",
      "Prob(Omnibus):                  0.000   Jarque-Bera (JB):               44.613\n",
      "Skew:                           0.525   Prob(JB):                     2.05e-10\n",
      "Kurtosis:                       5.255   Cond. No.                         112.\n",
      "==============================================================================\n",
      "\n",
      "Notes:\n",
      "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n"
     ]
    }
   ],
   "source": [
    "print(model3.summary())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "np.float64(0.8561408655827665)"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model3.rsquared"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The share of campaign expenditures explains over 85% of the variation in the election outcomes for this sample"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Exercises"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### C1\n",
    "The data in 401K are a subset of data analyzed by Papke (1995) to study the relationship between\n",
    "participation in a 401(k) pension plan and the generosity of the plan. The variable prate is the percentage of eligible workers with an active account; this is the variable we would like to explain. The\n",
    "measure of generosity is the plan match rate, mrate. This variable gives the average amount the firm\n",
    "contributes to each worker’s plan for each $1 contribution by the worker. For example , if mrate =   0.50, then a $1 contribution by the worker is matched by a 50¢ contribution by the firm."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>prate</th>\n",
       "      <th>mrate</th>\n",
       "      <th>totpart</th>\n",
       "      <th>totelg</th>\n",
       "      <th>age</th>\n",
       "      <th>totemp</th>\n",
       "      <th>sole</th>\n",
       "      <th>ltotemp</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>26.100000</td>\n",
       "      <td>0.21</td>\n",
       "      <td>1653.0</td>\n",
       "      <td>6322.0</td>\n",
       "      <td>8</td>\n",
       "      <td>8709.0</td>\n",
       "      <td>0</td>\n",
       "      <td>9.072112</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>100.000000</td>\n",
       "      <td>1.42</td>\n",
       "      <td>262.0</td>\n",
       "      <td>262.0</td>\n",
       "      <td>6</td>\n",
       "      <td>315.0</td>\n",
       "      <td>1</td>\n",
       "      <td>5.752573</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>97.599998</td>\n",
       "      <td>0.91</td>\n",
       "      <td>166.0</td>\n",
       "      <td>170.0</td>\n",
       "      <td>10</td>\n",
       "      <td>275.0</td>\n",
       "      <td>1</td>\n",
       "      <td>5.616771</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>100.000000</td>\n",
       "      <td>0.42</td>\n",
       "      <td>257.0</td>\n",
       "      <td>257.0</td>\n",
       "      <td>7</td>\n",
       "      <td>500.0</td>\n",
       "      <td>0</td>\n",
       "      <td>6.214608</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>82.500000</td>\n",
       "      <td>0.53</td>\n",
       "      <td>591.0</td>\n",
       "      <td>716.0</td>\n",
       "      <td>28</td>\n",
       "      <td>933.0</td>\n",
       "      <td>1</td>\n",
       "      <td>6.838405</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        prate  mrate  totpart  totelg  age  totemp  sole   ltotemp\n",
       "0   26.100000   0.21   1653.0  6322.0    8  8709.0     0  9.072112\n",
       "1  100.000000   1.42    262.0   262.0    6   315.0     1  5.752573\n",
       "2   97.599998   0.91    166.0   170.0   10   275.0     1  5.616771\n",
       "3  100.000000   0.42    257.0   257.0    7   500.0     0  6.214608\n",
       "4   82.500000   0.53    591.0   716.0   28   933.0     1  6.838405"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = dataWoo('401K')\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "name of dataset: 401k\n",
      "no of variables: 8\n",
      "no of observations: 1534\n",
      "\n",
      "+----------+---------------------------------+\n",
      "| variable | label                           |\n",
      "+----------+---------------------------------+\n",
      "| prate    | participation rate, percent     |\n",
      "| mrate    | 401k plan match rate            |\n",
      "| totpart  | total 401k participants         |\n",
      "| totelg   | total eligible for 401k plan    |\n",
      "| age      | age of 401k plan                |\n",
      "| totemp   | total number of firm employees  |\n",
      "| sole     | = 1 if 401k is firm's sole plan |\n",
      "| ltotemp  | log of totemp                   |\n",
      "+----------+---------------------------------+\n",
      "\n",
      "L.E. Papke (1995), “Participation in and Contributions to 401(k)\n",
      "Pension Plans:Evidence from Plan Data,” Journal of Human Resources 30,\n",
      "311-325. Professor Papke kindly provided these data. She gathered them\n",
      "from the Internal Revenue Service’s Form 5500 tapes.\n"
     ]
    }
   ],
   "source": [
    "dataWoo('401K', description=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "(i) Find the average participation rate and the average match rate in the sample of plans."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Average participation rate: 87.36\n",
      "Average match rate: 0.73\n"
     ]
    }
   ],
   "source": [
    "print(\"Average participation rate:\", round(df['prate'].mean(), 2))\n",
    "print(\"Average match rate:\", round(df['mrate'].mean(), 2))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "(ii) Now, estimate the simple regression equation\n",
    "$$\n",
    "\\hat{prate} = \\hat{b}_0 + \\hat{b}_1 mrate\n",
    "$$\n",
    "\n",
    "and report the results along with the sample size and R-squared"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "results: Intercept    83.075455\n",
      "mrate         5.861079\n",
      "dtype: float64\n",
      "R squared: 0.075\n",
      "Sample size: 1534.0\n"
     ]
    }
   ],
   "source": [
    "prate_hat = smf.ols(\"prate ~ 1 + mrate\", data=df).fit()\n",
    "\n",
    "print(\"results:\", prate_hat.params)\n",
    "\n",
    "print(\"R squared:\", prate_hat.rsquared.__round__(3))\n",
    "\n",
    "print(\"Sample size:\", prate_hat.nobs)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "(iii) Interpret the intercept in your equation. Interpret the coefficient on mrate."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "intercept: 83.08\n"
     ]
    }
   ],
   "source": [
    "print('intercept:', prate_hat.params.iloc[0].__round__(2))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Find the predicted prate when mrate = 3.5. Is this a reasonable prediction? Explain what is\n",
    "happening here."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    103.59\n",
       "dtype: float64"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "round(prate_hat.predict({'mrate': 3.5}), 2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "(v) How much of the variation in prate is explained by mrate? Is this a lot in your opinion?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Percentage explained: 7.5\n"
     ]
    }
   ],
   "source": [
    "print(\"Percentage explained:\", round(prate_hat.rsquared * 100, 1))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### C2\n",
    "The data set in CEOSAL2 contains information on chief executive officers for U.S. corporations. The\n",
    "variable salary is annual compensation, in thousands of dollars, and ceoten is prior number of years as\n",
    "company CEO."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>salary</th>\n",
       "      <th>age</th>\n",
       "      <th>college</th>\n",
       "      <th>grad</th>\n",
       "      <th>comten</th>\n",
       "      <th>ceoten</th>\n",
       "      <th>sales</th>\n",
       "      <th>profits</th>\n",
       "      <th>mktval</th>\n",
       "      <th>lsalary</th>\n",
       "      <th>lsales</th>\n",
       "      <th>lmktval</th>\n",
       "      <th>comtensq</th>\n",
       "      <th>ceotensq</th>\n",
       "      <th>profmarg</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1161</td>\n",
       "      <td>49</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>9</td>\n",
       "      <td>2</td>\n",
       "      <td>6200.0</td>\n",
       "      <td>966</td>\n",
       "      <td>23200.0</td>\n",
       "      <td>7.057037</td>\n",
       "      <td>8.732305</td>\n",
       "      <td>10.051908</td>\n",
       "      <td>81</td>\n",
       "      <td>4</td>\n",
       "      <td>15.580646</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>600</td>\n",
       "      <td>43</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>10</td>\n",
       "      <td>10</td>\n",
       "      <td>283.0</td>\n",
       "      <td>48</td>\n",
       "      <td>1100.0</td>\n",
       "      <td>6.396930</td>\n",
       "      <td>5.645447</td>\n",
       "      <td>7.003066</td>\n",
       "      <td>100</td>\n",
       "      <td>100</td>\n",
       "      <td>16.961130</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>379</td>\n",
       "      <td>51</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>9</td>\n",
       "      <td>3</td>\n",
       "      <td>169.0</td>\n",
       "      <td>40</td>\n",
       "      <td>1100.0</td>\n",
       "      <td>5.937536</td>\n",
       "      <td>5.129899</td>\n",
       "      <td>7.003066</td>\n",
       "      <td>81</td>\n",
       "      <td>9</td>\n",
       "      <td>23.668638</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>651</td>\n",
       "      <td>55</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>22</td>\n",
       "      <td>22</td>\n",
       "      <td>1100.0</td>\n",
       "      <td>-54</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>6.478509</td>\n",
       "      <td>7.003066</td>\n",
       "      <td>6.907755</td>\n",
       "      <td>484</td>\n",
       "      <td>484</td>\n",
       "      <td>-4.909091</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>497</td>\n",
       "      <td>44</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>8</td>\n",
       "      <td>6</td>\n",
       "      <td>351.0</td>\n",
       "      <td>28</td>\n",
       "      <td>387.0</td>\n",
       "      <td>6.208590</td>\n",
       "      <td>5.860786</td>\n",
       "      <td>5.958425</td>\n",
       "      <td>64</td>\n",
       "      <td>36</td>\n",
       "      <td>7.977208</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   salary  age  college  grad  comten  ceoten   sales  profits   mktval  \\\n",
       "0    1161   49        1     1       9       2  6200.0      966  23200.0   \n",
       "1     600   43        1     1      10      10   283.0       48   1100.0   \n",
       "2     379   51        1     1       9       3   169.0       40   1100.0   \n",
       "3     651   55        1     0      22      22  1100.0      -54   1000.0   \n",
       "4     497   44        1     1       8       6   351.0       28    387.0   \n",
       "\n",
       "    lsalary    lsales    lmktval  comtensq  ceotensq   profmarg  \n",
       "0  7.057037  8.732305  10.051908        81         4  15.580646  \n",
       "1  6.396930  5.645447   7.003066       100       100  16.961130  \n",
       "2  5.937536  5.129899   7.003066        81         9  23.668638  \n",
       "3  6.478509  7.003066   6.907755       484       484  -4.909091  \n",
       "4  6.208590  5.860786   5.958425        64        36   7.977208  "
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df2 = dataWoo(\"CEOSAL2\")\n",
    "df2.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "name of dataset: ceosal2\n",
      "no of variables: 15\n",
      "no of observations: 177\n",
      "\n",
      "+----------+--------------------------------+\n",
      "| variable | label                          |\n",
      "+----------+--------------------------------+\n",
      "| salary   | 1990 compensation, $1000s      |\n",
      "| age      | in years                       |\n",
      "| college  | =1 if attended college         |\n",
      "| grad     | =1 if attended graduate school |\n",
      "| comten   | years with company             |\n",
      "| ceoten   | years as ceo with company      |\n",
      "| sales    | 1990 firm sales, millions      |\n",
      "| profits  | 1990 profits, millions         |\n",
      "| mktval   | market value, end 1990, mills. |\n",
      "| lsalary  | log(salary)                    |\n",
      "| lsales   | log(sales)                     |\n",
      "| lmktval  | log(mktval)                    |\n",
      "| comtensq | comten^2                       |\n",
      "| ceotensq | ceoten^2                       |\n",
      "| profmarg | profits as % of sales          |\n",
      "+----------+--------------------------------+\n",
      "\n",
      "See CEOSAL1.RAW\n"
     ]
    }
   ],
   "source": [
    "dataWoo(\"CEOSAL2\", description=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "(i) Find the average salary and the average tenure in the sample."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Average Salary: 865.864\n",
      "Average ceoten 7.95\n"
     ]
    }
   ],
   "source": [
    "print(\"Average Salary:\", round(df2['salary'].mean(), 3))\n",
    "print(\"Average ceoten\", round(df2[\"ceoten\"].mean(), 2))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "(ii) How many CEOs are in their first year as CEO (that is, ceoten = 0)? What is the longest tenure\n",
    "as a CEO?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of first year CEO: 5\n",
      "Longest Tenure: 37\n"
     ]
    }
   ],
   "source": [
    "print(\"Number of first year CEO:\", (df2['ceoten'] == 0).sum())\n",
    "print(\"Longest Tenure:\", df2[\"ceoten\"].max())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "(iii) Estimate the simple regression model \n",
    "$$\n",
    "\\log(salary) =  {B}_0 + {B}_1 {ceoten} + u,\n",
    "$$\n",
    "and report your results in the usual form. What is the (approximate) predicted percentage\n",
    "increase in salary given one more year as a CEO?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Paramters:\n",
      "Intercept    6.505498\n",
      "ceoten       0.009724\n",
      "dtype: float64\n",
      "Percentage increase: 0.97\n"
     ]
    }
   ],
   "source": [
    "log_salary_hat = smf.ols(\"np.log(salary) ~ 1 + ceoten\", data=df2).fit()\n",
    "\n",
    "print(\"Paramters:\\n\", log_salary_hat.params, sep='')\n",
    "\n",
    "print(\"Percentage increase:\", round(log_salary_hat.params.iloc[1] * 100, 2))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### C3 \n",
    "Use the data in SLEEP75 from Biddle and Hamermesh (1990) to study whether there is a tradeoff\n",
    "between the time spent sleeping per week and the time spent in paid work. We could use either variable\n",
    "as the dependent variable. For concreteness, \n",
    "estimate the model \n",
    "$$\n",
    "sleep = B_0 + B_1 totwrk + u\n",
    "$$ \n",
    "where sleep is minutes spent sleeping at night per week and totwrk is total minutes worked during the week"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>black</th>\n",
       "      <th>case</th>\n",
       "      <th>clerical</th>\n",
       "      <th>construc</th>\n",
       "      <th>educ</th>\n",
       "      <th>earns74</th>\n",
       "      <th>gdhlth</th>\n",
       "      <th>inlf</th>\n",
       "      <th>leis1</th>\n",
       "      <th>...</th>\n",
       "      <th>spwrk75</th>\n",
       "      <th>totwrk</th>\n",
       "      <th>union</th>\n",
       "      <th>worknrm</th>\n",
       "      <th>workscnd</th>\n",
       "      <th>exper</th>\n",
       "      <th>yngkid</th>\n",
       "      <th>yrsmarr</th>\n",
       "      <th>hrwage</th>\n",
       "      <th>agesq</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>32</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>12</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>3529</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>3438</td>\n",
       "      <td>0</td>\n",
       "      <td>3438</td>\n",
       "      <td>0</td>\n",
       "      <td>14</td>\n",
       "      <td>0</td>\n",
       "      <td>13</td>\n",
       "      <td>7.070004</td>\n",
       "      <td>1024</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>31</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>14</td>\n",
       "      <td>9500.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2140</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>5020</td>\n",
       "      <td>0</td>\n",
       "      <td>5020</td>\n",
       "      <td>0</td>\n",
       "      <td>11</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.429999</td>\n",
       "      <td>961</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>44</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>17</td>\n",
       "      <td>42500.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>4595</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>2815</td>\n",
       "      <td>0</td>\n",
       "      <td>2815</td>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>20.529997</td>\n",
       "      <td>1936</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>30</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>12</td>\n",
       "      <td>42500.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>3211</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>3786</td>\n",
       "      <td>0</td>\n",
       "      <td>3786</td>\n",
       "      <td>0</td>\n",
       "      <td>12</td>\n",
       "      <td>0</td>\n",
       "      <td>12</td>\n",
       "      <td>9.619998</td>\n",
       "      <td>900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>64</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>14</td>\n",
       "      <td>2500.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>4052</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>2580</td>\n",
       "      <td>0</td>\n",
       "      <td>2580</td>\n",
       "      <td>0</td>\n",
       "      <td>44</td>\n",
       "      <td>0</td>\n",
       "      <td>33</td>\n",
       "      <td>2.750000</td>\n",
       "      <td>4096</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 34 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   age  black  case  clerical  construc  educ  earns74  gdhlth  inlf  leis1  \\\n",
       "0   32      0     1       0.0       0.0    12      0.0       0     1   3529   \n",
       "1   31      0     2       0.0       0.0    14   9500.0       1     1   2140   \n",
       "2   44      0     3       0.0       0.0    17  42500.0       1     1   4595   \n",
       "3   30      0     4       0.0       0.0    12  42500.0       1     1   3211   \n",
       "4   64      0     5       0.0       0.0    14   2500.0       1     1   4052   \n",
       "\n",
       "   ...  spwrk75  totwrk  union  worknrm  workscnd  exper  yngkid  yrsmarr  \\\n",
       "0  ...        0    3438      0     3438         0     14       0       13   \n",
       "1  ...        0    5020      0     5020         0     11       0        0   \n",
       "2  ...        1    2815      0     2815         0     21       0        0   \n",
       "3  ...        1    3786      0     3786         0     12       0       12   \n",
       "4  ...        1    2580      0     2580         0     44       0       33   \n",
       "\n",
       "      hrwage  agesq  \n",
       "0   7.070004   1024  \n",
       "1   1.429999    961  \n",
       "2  20.529997   1936  \n",
       "3   9.619998    900  \n",
       "4   2.750000   4096  \n",
       "\n",
       "[5 rows x 34 columns]"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df3 = dataWoo(\"sleep75\")\n",
    "df3.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "name of dataset: sleep75\n",
      "no of variables: 34\n",
      "no of observations: 706\n",
      "\n",
      "+----------+--------------------------------+\n",
      "| variable | label                          |\n",
      "+----------+--------------------------------+\n",
      "| age      | in years                       |\n",
      "| black    | =1 if black                    |\n",
      "| case     | identifier                     |\n",
      "| clerical | =1 if clerical worker          |\n",
      "| construc | =1 if construction worker      |\n",
      "| educ     | years of schooling             |\n",
      "| earns74  | total earnings, 1974           |\n",
      "| gdhlth   | =1 if in good or excel. health |\n",
      "| inlf     | =1 if in labor force           |\n",
      "| leis1    | sleep - totwrk                 |\n",
      "| leis2    | slpnaps - totwrk               |\n",
      "| leis3    | rlxall - totwrk                |\n",
      "| smsa     | =1 if live in smsa             |\n",
      "| lhrwage  | log hourly wage                |\n",
      "| lothinc  | log othinc, unless othinc < 0  |\n",
      "| male     | =1 if male                     |\n",
      "| marr     | =1 if married                  |\n",
      "| prot     | =1 if Protestant               |\n",
      "| rlxall   | slpnaps + personal activs      |\n",
      "| selfe    | =1 if self employed            |\n",
      "| sleep    | mins sleep at night, per wk    |\n",
      "| slpnaps  | minutes sleep, inc. naps       |\n",
      "| south    | =1 if live in south            |\n",
      "| spsepay  | spousal wage income            |\n",
      "| spwrk75  | =1 if spouse works             |\n",
      "| totwrk   | mins worked per week           |\n",
      "| union    | =1 if belong to union          |\n",
      "| worknrm  | mins work main job             |\n",
      "| workscnd | mins work second job           |\n",
      "| exper    | age - educ - 6                 |\n",
      "| yngkid   | =1 if children < 3 present     |\n",
      "| yrsmarr  | years married                  |\n",
      "| hrwage   | hourly wage                    |\n",
      "| agesq    | age^2                          |\n",
      "+----------+--------------------------------+\n",
      "\n",
      "J.E. Biddle and D.S. Hamermesh (1990), “Sleep and the Allocation of\n",
      "Time,” Journal of Political Economy 98, 922-943. Professor Biddle\n",
      "kindly provided the data.\n"
     ]
    }
   ],
   "source": [
    "dataWoo(\"sleep75\", description=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}