{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 2. The Simple Regression Model\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" J.M. Wooldridge (2016) Introductory Econometrics: A Modern Approach,\n",
" Cengage Learning, 6th edition.\n",
"\n",
" 401k 401ksubs admnrev affairs airfare\n",
" alcohol apple approval athlet1 athlet2\n",
" attend audit barium beauty benefits\n",
" beveridge big9salary bwght bwght2 campus\n",
" card catholic cement census2000 ceosal1\n",
" ceosal2 charity consump corn countymurders\n",
" cps78_85 cps91 crime1 crime2 crime3\n",
" crime4 discrim driving earns econmath\n",
" elem94_95 engin expendshares ezanders ezunem\n",
" fair fertil1 fertil2 fertil3 fish\n",
" fringe gpa1 gpa2 gpa3 happiness\n",
" hprice1 hprice2 hprice3 hseinv htv\n",
" infmrt injury intdef intqrt inven\n",
" jtrain jtrain2 jtrain3 kielmc lawsch85\n",
" loanapp lowbrth mathpnl meap00_01 meap01\n",
" meap93 meapsingle minwage mlb1 mroz\n",
" murder nbasal nyse okun openness\n",
" pension phillips pntsprd prison prminwge\n",
" rdchem rdtelec recid rental return\n",
" saving sleep75 slp75_81 smoke traffic1\n",
" traffic2 twoyear volat vote1 vote2\n",
" voucher wage1 wage2 wagepan wageprc\n",
" wine\n"
]
}
],
"source": [
"from wooldridge import *\n",
"import pandas as pd\n",
"import statsmodels.api as sm\n",
"import statsmodels.formula.api as smf\n",
"import numpy as np\n",
"\n",
"dataWoo()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Example 2.3 CEO Salary & Return on Equity"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"name of dataset: ceosal1\n",
"no of variables: 12\n",
"no of observations: 209\n",
"\n",
"+----------+-------------------------------+\n",
"| variable | label |\n",
"+----------+-------------------------------+\n",
"| salary | 1990 salary, thousands $ |\n",
"| pcsalary | % change salary, 89-90 |\n",
"| sales | 1990 firm sales, millions $ |\n",
"| roe | return on equity, 88-90 avg |\n",
"| pcroe | % change roe, 88-90 |\n",
"| ros | return on firm's stock, 88-90 |\n",
"| indus | =1 if industrial firm |\n",
"| finance | =1 if financial firm |\n",
"| consprod | =1 if consumer product firm |\n",
"| utility | =1 if transport. or utilties |\n",
"| lsalary | natural log of salary |\n",
"| lsales | natural log of sales |\n",
"+----------+-------------------------------+\n",
"\n",
"I took a random sample of data reported in the May 6, 1991 issue of\n",
"Businessweek.\n"
]
}
],
"source": [
"df = dataWoo('ceosal1')\n",
"dataWoo('ceosal1', description=True)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" salary | \n",
" pcsalary | \n",
" sales | \n",
" roe | \n",
" pcroe | \n",
" ros | \n",
" indus | \n",
" finance | \n",
" consprod | \n",
" utility | \n",
" lsalary | \n",
" lsales | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1095 | \n",
" 20 | \n",
" 27595.000000 | \n",
" 14.1 | \n",
" 106.400002 | \n",
" 191 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 6.998509 | \n",
" 10.225389 | \n",
"
\n",
" \n",
" 1 | \n",
" 1001 | \n",
" 32 | \n",
" 9958.000000 | \n",
" 10.9 | \n",
" -30.600000 | \n",
" 13 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 6.908755 | \n",
" 9.206132 | \n",
"
\n",
" \n",
" 2 | \n",
" 1122 | \n",
" 9 | \n",
" 6125.899902 | \n",
" 23.5 | \n",
" -16.299999 | \n",
" 14 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 7.022868 | \n",
" 8.720281 | \n",
"
\n",
" \n",
" 3 | \n",
" 578 | \n",
" -9 | \n",
" 16246.000000 | \n",
" 5.9 | \n",
" -25.700001 | \n",
" -21 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 6.359574 | \n",
" 9.695602 | \n",
"
\n",
" \n",
" 4 | \n",
" 1368 | \n",
" 7 | \n",
" 21783.199219 | \n",
" 13.8 | \n",
" -3.000000 | \n",
" 56 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 7.221105 | \n",
" 9.988894 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" salary pcsalary sales roe pcroe ros indus finance \\\n",
"0 1095 20 27595.000000 14.1 106.400002 191 1 0 \n",
"1 1001 32 9958.000000 10.9 -30.600000 13 1 0 \n",
"2 1122 9 6125.899902 23.5 -16.299999 14 1 0 \n",
"3 578 -9 16246.000000 5.9 -25.700001 -21 1 0 \n",
"4 1368 7 21783.199219 13.8 -3.000000 56 1 0 \n",
"\n",
" consprod utility lsalary lsales \n",
"0 0 0 6.998509 10.225389 \n",
"1 0 0 6.908755 9.206132 \n",
"2 0 0 7.022868 8.720281 \n",
"3 0 0 6.359574 9.695602 \n",
"4 0 0 7.221105 9.988894 "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"OLS Regression Results\n",
"\n",
" Dep. Variable: | salary | R-squared: | 0.013 | \n",
"
\n",
"\n",
" Model: | OLS | Adj. R-squared: | 0.008 | \n",
"
\n",
"\n",
" Method: | Least Squares | F-statistic: | 2.767 | \n",
"
\n",
"\n",
" Date: | Tue, 09 Jul 2024 | Prob (F-statistic): | 0.0978 | \n",
"
\n",
"\n",
" Time: | 21:56:56 | Log-Likelihood: | -1804.5 | \n",
"
\n",
"\n",
" No. Observations: | 209 | AIC: | 3613. | \n",
"
\n",
"\n",
" Df Residuals: | 207 | BIC: | 3620. | \n",
"
\n",
"\n",
" Df Model: | 1 | | | \n",
"
\n",
"\n",
" Covariance Type: | nonrobust | | | \n",
"
\n",
"
\n",
"\n",
"\n",
" | coef | std err | t | P>|t| | [0.025 | 0.975] | \n",
"
\n",
"\n",
" Intercept | 963.1913 | 213.240 | 4.517 | 0.000 | 542.790 | 1383.592 | \n",
"
\n",
"\n",
" roe | 18.5012 | 11.123 | 1.663 | 0.098 | -3.428 | 40.431 | \n",
"
\n",
"
\n",
"\n",
"\n",
" Omnibus: | 311.096 | Durbin-Watson: | 2.105 | \n",
"
\n",
"\n",
" Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 31120.902 | \n",
"
\n",
"\n",
" Skew: | 6.915 | Prob(JB): | 0.00 | \n",
"
\n",
"\n",
" Kurtosis: | 61.158 | Cond. No. | 43.3 | \n",
"
\n",
"
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified."
],
"text/latex": [
"\\begin{center}\n",
"\\begin{tabular}{lclc}\n",
"\\toprule\n",
"\\textbf{Dep. Variable:} & salary & \\textbf{ R-squared: } & 0.013 \\\\\n",
"\\textbf{Model:} & OLS & \\textbf{ Adj. R-squared: } & 0.008 \\\\\n",
"\\textbf{Method:} & Least Squares & \\textbf{ F-statistic: } & 2.767 \\\\\n",
"\\textbf{Date:} & Tue, 09 Jul 2024 & \\textbf{ Prob (F-statistic):} & 0.0978 \\\\\n",
"\\textbf{Time:} & 21:56:56 & \\textbf{ Log-Likelihood: } & -1804.5 \\\\\n",
"\\textbf{No. Observations:} & 209 & \\textbf{ AIC: } & 3613. \\\\\n",
"\\textbf{Df Residuals:} & 207 & \\textbf{ BIC: } & 3620. \\\\\n",
"\\textbf{Df Model:} & 1 & \\textbf{ } & \\\\\n",
"\\textbf{Covariance Type:} & nonrobust & \\textbf{ } & \\\\\n",
"\\bottomrule\n",
"\\end{tabular}\n",
"\\begin{tabular}{lcccccc}\n",
" & \\textbf{coef} & \\textbf{std err} & \\textbf{t} & \\textbf{P$> |$t$|$} & \\textbf{[0.025} & \\textbf{0.975]} \\\\\n",
"\\midrule\n",
"\\textbf{Intercept} & 963.1913 & 213.240 & 4.517 & 0.000 & 542.790 & 1383.592 \\\\\n",
"\\textbf{roe} & 18.5012 & 11.123 & 1.663 & 0.098 & -3.428 & 40.431 \\\\\n",
"\\bottomrule\n",
"\\end{tabular}\n",
"\\begin{tabular}{lclc}\n",
"\\textbf{Omnibus:} & 311.096 & \\textbf{ Durbin-Watson: } & 2.105 \\\\\n",
"\\textbf{Prob(Omnibus):} & 0.000 & \\textbf{ Jarque-Bera (JB): } & 31120.902 \\\\\n",
"\\textbf{Skew:} & 6.915 & \\textbf{ Prob(JB): } & 0.00 \\\\\n",
"\\textbf{Kurtosis:} & 61.158 & \\textbf{ Cond. No. } & 43.3 \\\\\n",
"\\bottomrule\n",
"\\end{tabular}\n",
"%\\caption{OLS Regression Results}\n",
"\\end{center}\n",
"\n",
"Notes: \\newline\n",
" [1] Standard Errors assume that the covariance matrix of the errors is correctly specified."
],
"text/plain": [
"\n",
"\"\"\"\n",
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: salary R-squared: 0.013\n",
"Model: OLS Adj. R-squared: 0.008\n",
"Method: Least Squares F-statistic: 2.767\n",
"Date: Tue, 09 Jul 2024 Prob (F-statistic): 0.0978\n",
"Time: 21:56:56 Log-Likelihood: -1804.5\n",
"No. Observations: 209 AIC: 3613.\n",
"Df Residuals: 207 BIC: 3620.\n",
"Df Model: 1 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"Intercept 963.1913 213.240 4.517 0.000 542.790 1383.592\n",
"roe 18.5012 11.123 1.663 0.098 -3.428 40.431\n",
"==============================================================================\n",
"Omnibus: 311.096 Durbin-Watson: 2.105\n",
"Prob(Omnibus): 0.000 Jarque-Bera (JB): 31120.902\n",
"Skew: 6.915 Prob(JB): 0.00\n",
"Kurtosis: 61.158 Cond. No. 43.3\n",
"==============================================================================\n",
"\n",
"Notes:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
"\"\"\""
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model = smf.ols(formula='salary ~ 1 + roe', data=df).fit()\n",
"model.summary()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: salary R-squared: 0.013\n",
"Model: OLS Adj. R-squared: 0.008\n",
"Method: Least Squares F-statistic: 2.767\n",
"Date: Tue, 09 Jul 2024 Prob (F-statistic): 0.0978\n",
"Time: 21:56:56 Log-Likelihood: -1804.5\n",
"No. Observations: 209 AIC: 3613.\n",
"Df Residuals: 207 BIC: 3620.\n",
"Df Model: 1 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"Intercept 963.1913 213.240 4.517 0.000 542.790 1383.592\n",
"roe 18.5012 11.123 1.663 0.098 -3.428 40.431\n",
"==============================================================================\n",
"Omnibus: 311.096 Durbin-Watson: 2.105\n",
"Prob(Omnibus): 0.000 Jarque-Bera (JB): 31120.902\n",
"Skew: 6.915 Prob(JB): 0.00\n",
"Kurtosis: 61.158 Cond. No. 43.3\n",
"==============================================================================\n",
"\n",
"Notes:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n"
]
}
],
"source": [
"print(model.summary())"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Intercept 963.191336\n",
"roe 18.501186\n",
"dtype: float64"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.params"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"209.0"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.nobs"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"if the return on equity increases\n",
"by one percentage point, then salary is predicted to change by about 18.5"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 1518.226927\n",
"dtype: float64"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"predicted_salary = model.predict(pd.DataFrame({'roe': [30]}))\n",
"predicted_salary"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Example 2.4 Wage Equation"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"name of dataset: wage1\n",
"no of variables: 24\n",
"no of observations: 526\n",
"\n",
"+----------+---------------------------------+\n",
"| variable | label |\n",
"+----------+---------------------------------+\n",
"| wage | average hourly earnings |\n",
"| educ | years of education |\n",
"| exper | years potential experience |\n",
"| tenure | years with current employer |\n",
"| nonwhite | =1 if nonwhite |\n",
"| female | =1 if female |\n",
"| married | =1 if married |\n",
"| numdep | number of dependents |\n",
"| smsa | =1 if live in SMSA |\n",
"| northcen | =1 if live in north central U.S |\n",
"| south | =1 if live in southern region |\n",
"| west | =1 if live in western region |\n",
"| construc | =1 if work in construc. indus. |\n",
"| ndurman | =1 if in nondur. manuf. indus. |\n",
"| trcommpu | =1 if in trans, commun, pub ut |\n",
"| trade | =1 if in wholesale or retail |\n",
"| services | =1 if in services indus. |\n",
"| profserv | =1 if in prof. serv. indus. |\n",
"| profocc | =1 if in profess. occupation |\n",
"| clerocc | =1 if in clerical occupation |\n",
"| servocc | =1 if in service occupation |\n",
"| lwage | log(wage) |\n",
"| expersq | exper^2 |\n",
"| tenursq | tenure^2 |\n",
"+----------+---------------------------------+\n",
"\n",
"These are data from the 1976 Current Population Survey, collected by\n",
"Henry Farber when he and I were colleagues at MIT in 1988.\n"
]
}
],
"source": [
"df2 = dataWoo('wage1')\n",
"dataWoo('wage1', description=True)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" wage | \n",
" educ | \n",
" exper | \n",
" tenure | \n",
" nonwhite | \n",
" female | \n",
" married | \n",
" numdep | \n",
" smsa | \n",
" northcen | \n",
" ... | \n",
" trcommpu | \n",
" trade | \n",
" services | \n",
" profserv | \n",
" profocc | \n",
" clerocc | \n",
" servocc | \n",
" lwage | \n",
" expersq | \n",
" tenursq | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 3.10 | \n",
" 11 | \n",
" 2 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 2 | \n",
" 1 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1.131402 | \n",
" 4 | \n",
" 0 | \n",
"
\n",
" \n",
" 1 | \n",
" 3.24 | \n",
" 12 | \n",
" 22 | \n",
" 2 | \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
" 3 | \n",
" 1 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 1.175573 | \n",
" 484 | \n",
" 4 | \n",
"
\n",
" \n",
" 2 | \n",
" 3.00 | \n",
" 11 | \n",
" 2 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 2 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1.098612 | \n",
" 4 | \n",
" 0 | \n",
"
\n",
" \n",
" 3 | \n",
" 6.00 | \n",
" 8 | \n",
" 44 | \n",
" 28 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 1.791759 | \n",
" 1936 | \n",
" 784 | \n",
"
\n",
" \n",
" 4 | \n",
" 5.30 | \n",
" 12 | \n",
" 7 | \n",
" 2 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1.667707 | \n",
" 49 | \n",
" 4 | \n",
"
\n",
" \n",
"
\n",
"
5 rows × 24 columns
\n",
"
"
],
"text/plain": [
" wage educ exper tenure nonwhite female married numdep smsa \\\n",
"0 3.10 11 2 0 0 1 0 2 1 \n",
"1 3.24 12 22 2 0 1 1 3 1 \n",
"2 3.00 11 2 0 0 0 0 2 0 \n",
"3 6.00 8 44 28 0 0 1 0 1 \n",
"4 5.30 12 7 2 0 0 1 1 0 \n",
"\n",
" northcen ... trcommpu trade services profserv profocc clerocc \\\n",
"0 0 ... 0 0 0 0 0 0 \n",
"1 0 ... 0 0 1 0 0 0 \n",
"2 0 ... 0 1 0 0 0 0 \n",
"3 0 ... 0 0 0 0 0 1 \n",
"4 0 ... 0 0 0 0 0 0 \n",
"\n",
" servocc lwage expersq tenursq \n",
"0 0 1.131402 4 0 \n",
"1 1 1.175573 484 4 \n",
"2 0 1.098612 4 0 \n",
"3 0 1.791759 1936 784 \n",
"4 0 1.667707 49 4 \n",
"\n",
"[5 rows x 24 columns]"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2.head()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: wage R-squared: 0.165\n",
"Model: OLS Adj. R-squared: 0.163\n",
"Method: Least Squares F-statistic: 103.4\n",
"Date: Tue, 09 Jul 2024 Prob (F-statistic): 2.78e-22\n",
"Time: 21:56:56 Log-Likelihood: -1385.7\n",
"No. Observations: 526 AIC: 2775.\n",
"Df Residuals: 524 BIC: 2784.\n",
"Df Model: 1 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"Intercept -0.9049 0.685 -1.321 0.187 -2.250 0.441\n",
"educ 0.5414 0.053 10.167 0.000 0.437 0.646\n",
"==============================================================================\n",
"Omnibus: 212.554 Durbin-Watson: 1.824\n",
"Prob(Omnibus): 0.000 Jarque-Bera (JB): 807.843\n",
"Skew: 1.861 Prob(JB): 3.79e-176\n",
"Kurtosis: 7.797 Cond. No. 60.2\n",
"==============================================================================\n",
"\n",
"Notes:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n"
]
}
],
"source": [
"model2 = smf.ols(formula='wage ~ educ', data=df2).fit()\n",
"print(model2.summary())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The intercept of −0.90 literally means that a person\n",
"with no education has a predicted hourly wage of −90¢ an hour"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"one more year of education increases hourly wage by\n",
"54¢ an hour"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Example 2.5 Vote share"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"name of dataset: vote1\n",
"no of variables: 10\n",
"no of observations: 173\n",
"\n",
"+----------+---------------------------------+\n",
"| variable | label |\n",
"+----------+---------------------------------+\n",
"| state | state postal code |\n",
"| district | congressional district |\n",
"| democA | =1 if A is democrat |\n",
"| voteA | percent vote for A |\n",
"| expendA | camp. expends. by A, $1000s |\n",
"| expendB | camp. expends. by B, $1000s |\n",
"| prtystrA | % vote for president |\n",
"| lexpendA | log(expendA) |\n",
"| lexpendB | log(expendB) |\n",
"| shareA | 100*(expendA/(expendA+expendB)) |\n",
"+----------+---------------------------------+\n",
"\n",
"From M. Barone and G. Ujifusa, The Almanac of American Politics, 1992.\n",
"Washington, DC: National Journal.\n"
]
}
],
"source": [
"df3 = dataWoo('vote1')\n",
"dataWoo('vote1', description=True)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" state | \n",
" district | \n",
" democA | \n",
" voteA | \n",
" expendA | \n",
" expendB | \n",
" prtystrA | \n",
" lexpendA | \n",
" lexpendB | \n",
" shareA | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" AL | \n",
" 7 | \n",
" 1 | \n",
" 68 | \n",
" 328.295990 | \n",
" 8.737000 | \n",
" 41 | \n",
" 5.793916 | \n",
" 2.167567 | \n",
" 97.407669 | \n",
"
\n",
" \n",
" 1 | \n",
" AK | \n",
" 1 | \n",
" 0 | \n",
" 62 | \n",
" 626.377014 | \n",
" 402.476990 | \n",
" 60 | \n",
" 6.439952 | \n",
" 5.997638 | \n",
" 60.881039 | \n",
"
\n",
" \n",
" 2 | \n",
" AZ | \n",
" 2 | \n",
" 1 | \n",
" 73 | \n",
" 99.607002 | \n",
" 3.065000 | \n",
" 55 | \n",
" 4.601233 | \n",
" 1.120048 | \n",
" 97.014763 | \n",
"
\n",
" \n",
" 3 | \n",
" AZ | \n",
" 3 | \n",
" 0 | \n",
" 69 | \n",
" 319.690002 | \n",
" 26.281000 | \n",
" 64 | \n",
" 5.767352 | \n",
" 3.268846 | \n",
" 92.403702 | \n",
"
\n",
" \n",
" 4 | \n",
" AR | \n",
" 3 | \n",
" 0 | \n",
" 75 | \n",
" 159.220993 | \n",
" 60.054001 | \n",
" 66 | \n",
" 5.070293 | \n",
" 4.095244 | \n",
" 72.612473 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" state district democA voteA expendA expendB prtystrA lexpendA \\\n",
"0 AL 7 1 68 328.295990 8.737000 41 5.793916 \n",
"1 AK 1 0 62 626.377014 402.476990 60 6.439952 \n",
"2 AZ 2 1 73 99.607002 3.065000 55 4.601233 \n",
"3 AZ 3 0 69 319.690002 26.281000 64 5.767352 \n",
"4 AR 3 0 75 159.220993 60.054001 66 5.070293 \n",
"\n",
" lexpendB shareA \n",
"0 2.167567 97.407669 \n",
"1 5.997638 60.881039 \n",
"2 1.120048 97.014763 \n",
"3 3.268846 92.403702 \n",
"4 4.095244 72.612473 "
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df3.head()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: voteA R-squared: 0.856\n",
"Model: OLS Adj. R-squared: 0.855\n",
"Method: Least Squares F-statistic: 1018.\n",
"Date: Tue, 09 Jul 2024 Prob (F-statistic): 6.63e-74\n",
"Time: 21:56:56 Log-Likelihood: -565.20\n",
"No. Observations: 173 AIC: 1134.\n",
"Df Residuals: 171 BIC: 1141.\n",
"Df Model: 1 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"Intercept 26.8122 0.887 30.221 0.000 25.061 28.564\n",
"shareA 0.4638 0.015 31.901 0.000 0.435 0.493\n",
"==============================================================================\n",
"Omnibus: 20.747 Durbin-Watson: 1.826\n",
"Prob(Omnibus): 0.000 Jarque-Bera (JB): 44.613\n",
"Skew: 0.525 Prob(JB): 2.05e-10\n",
"Kurtosis: 5.255 Cond. No. 112.\n",
"==============================================================================\n",
"\n",
"Notes:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n"
]
}
],
"source": [
"model3 = smf.ols(formula='voteA ~ shareA', data=df3).fit()\n",
"print(model3.summary())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"if Candidate A’s share of spending increases by one percentage point, Candidate A\n",
"receives almost one-half a percentage point (0.464) more of the total vote."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Example 2.6 Table 2.2"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"df['salary_hat'] = model.fittedvalues\n",
"df['uhat'] = model.resid"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" roe | \n",
" salary | \n",
" salary_hat | \n",
" uhat | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 14.100000 | \n",
" 1095 | \n",
" 1224.058071 | \n",
" -129.058071 | \n",
"
\n",
" \n",
" 1 | \n",
" 10.900000 | \n",
" 1001 | \n",
" 1164.854261 | \n",
" -163.854261 | \n",
"
\n",
" \n",
" 2 | \n",
" 23.500000 | \n",
" 1122 | \n",
" 1397.969216 | \n",
" -275.969216 | \n",
"
\n",
" \n",
" 3 | \n",
" 5.900000 | \n",
" 578 | \n",
" 1072.348338 | \n",
" -494.348338 | \n",
"
\n",
" \n",
" 4 | \n",
" 13.800000 | \n",
" 1368 | \n",
" 1218.507712 | \n",
" 149.492288 | \n",
"
\n",
" \n",
" 5 | \n",
" 20.000000 | \n",
" 1145 | \n",
" 1333.215063 | \n",
" -188.215063 | \n",
"
\n",
" \n",
" 6 | \n",
" 16.400000 | \n",
" 1078 | \n",
" 1266.610785 | \n",
" -188.610785 | \n",
"
\n",
" \n",
" 7 | \n",
" 16.299999 | \n",
" 1094 | \n",
" 1264.760660 | \n",
" -170.760660 | \n",
"
\n",
" \n",
" 8 | \n",
" 10.500000 | \n",
" 1237 | \n",
" 1157.453793 | \n",
" 79.546207 | \n",
"
\n",
" \n",
" 9 | \n",
" 26.299999 | \n",
" 833 | \n",
" 1449.772523 | \n",
" -616.772523 | \n",
"
\n",
" \n",
" 10 | \n",
" 25.900000 | \n",
" 567 | \n",
" 1442.372056 | \n",
" -875.372056 | \n",
"
\n",
" \n",
" 11 | \n",
" 26.799999 | \n",
" 933 | \n",
" 1459.023116 | \n",
" -526.023116 | \n",
"
\n",
" \n",
" 12 | \n",
" 14.800000 | \n",
" 1339 | \n",
" 1237.008898 | \n",
" 101.991102 | \n",
"
\n",
" \n",
" 13 | \n",
" 22.299999 | \n",
" 937 | \n",
" 1375.767778 | \n",
" -438.767778 | \n",
"
\n",
" \n",
" 14 | \n",
" 56.299999 | \n",
" 2011 | \n",
" 2004.808114 | \n",
" 6.191886 | \n",
"
\n",
" \n",
" 15 | \n",
" 12.600000 | \n",
" 1585 | \n",
" 1196.306291 | \n",
" 388.693709 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" roe salary salary_hat uhat\n",
"0 14.100000 1095 1224.058071 -129.058071\n",
"1 10.900000 1001 1164.854261 -163.854261\n",
"2 23.500000 1122 1397.969216 -275.969216\n",
"3 5.900000 578 1072.348338 -494.348338\n",
"4 13.800000 1368 1218.507712 149.492288\n",
"5 20.000000 1145 1333.215063 -188.215063\n",
"6 16.400000 1078 1266.610785 -188.610785\n",
"7 16.299999 1094 1264.760660 -170.760660\n",
"8 10.500000 1237 1157.453793 79.546207\n",
"9 26.299999 833 1449.772523 -616.772523\n",
"10 25.900000 567 1442.372056 -875.372056\n",
"11 26.799999 933 1459.023116 -526.023116\n",
"12 14.800000 1339 1237.008898 101.991102\n",
"13 22.299999 937 1375.767778 -438.767778\n",
"14 56.299999 2011 2004.808114 6.191886\n",
"15 12.600000 1585 1196.306291 388.693709"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[['roe','salary','salary_hat','uhat']].head(16)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The first four CEOs have lower salaries than what we predicted from the OLS regression\n",
"line (2.26); in other words, given only the firm’s roe, these CEOs make less than what we\n",
"predicted. As can be seen from the positive uhat, the fifth CEO makes more than predicted from\n",
"the OLS regression line."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Example 2.7 Wage & education"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"np.float64(5.896102674787035)"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2['wage'].mean()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"np.float64(12.562737642585551)"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2['educ'].mean()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: wage R-squared: 0.165\n",
"Model: OLS Adj. R-squared: 0.163\n",
"Method: Least Squares F-statistic: 103.4\n",
"Date: Tue, 09 Jul 2024 Prob (F-statistic): 2.78e-22\n",
"Time: 21:56:56 Log-Likelihood: -1385.7\n",
"No. Observations: 526 AIC: 2775.\n",
"Df Residuals: 524 BIC: 2784.\n",
"Df Model: 1 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"Intercept -0.9049 0.685 -1.321 0.187 -2.250 0.441\n",
"educ 0.5414 0.053 10.167 0.000 0.437 0.646\n",
"==============================================================================\n",
"Omnibus: 212.554 Durbin-Watson: 1.824\n",
"Prob(Omnibus): 0.000 Jarque-Bera (JB): 807.843\n",
"Skew: 1.861 Prob(JB): 3.79e-176\n",
"Kurtosis: 7.797 Cond. No. 60.2\n",
"==============================================================================\n",
"\n",
"Notes:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n"
]
}
],
"source": [
"print(model2.summary())"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 5.894621\n",
"dtype: float64"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model2.predict(pd.DataFrame({'educ': [12.56]}))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"$\\bar{x}$ and $\\bar{y}$ fall on the regression line"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Example 2.8. CEO Salary - R-squared"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: salary R-squared: 0.013\n",
"Model: OLS Adj. R-squared: 0.008\n",
"Method: Least Squares F-statistic: 2.767\n",
"Date: Tue, 09 Jul 2024 Prob (F-statistic): 0.0978\n",
"Time: 21:56:56 Log-Likelihood: -1804.5\n",
"No. Observations: 209 AIC: 3613.\n",
"Df Residuals: 207 BIC: 3620.\n",
"Df Model: 1 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"Intercept 963.1913 213.240 4.517 0.000 542.790 1383.592\n",
"roe 18.5012 11.123 1.663 0.098 -3.428 40.431\n",
"==============================================================================\n",
"Omnibus: 311.096 Durbin-Watson: 2.105\n",
"Prob(Omnibus): 0.000 Jarque-Bera (JB): 31120.902\n",
"Skew: 6.915 Prob(JB): 0.00\n",
"Kurtosis: 61.158 Cond. No. 43.3\n",
"==============================================================================\n",
"\n",
"Notes:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n"
]
}
],
"source": [
"print(model.summary())"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"np.float64(0.01318862408103405)"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.rsquared"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The firm’s return on equity explains only about 1.3% of the variation in salaries for this sample of 209 CEOs. "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Example2.9 Voting outcome - R-squared."
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: voteA R-squared: 0.856\n",
"Model: OLS Adj. R-squared: 0.855\n",
"Method: Least Squares F-statistic: 1018.\n",
"Date: Tue, 09 Jul 2024 Prob (F-statistic): 6.63e-74\n",
"Time: 21:56:56 Log-Likelihood: -565.20\n",
"No. Observations: 173 AIC: 1134.\n",
"Df Residuals: 171 BIC: 1141.\n",
"Df Model: 1 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"Intercept 26.8122 0.887 30.221 0.000 25.061 28.564\n",
"shareA 0.4638 0.015 31.901 0.000 0.435 0.493\n",
"==============================================================================\n",
"Omnibus: 20.747 Durbin-Watson: 1.826\n",
"Prob(Omnibus): 0.000 Jarque-Bera (JB): 44.613\n",
"Skew: 0.525 Prob(JB): 2.05e-10\n",
"Kurtosis: 5.255 Cond. No. 112.\n",
"==============================================================================\n",
"\n",
"Notes:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n"
]
}
],
"source": [
"print(model3.summary())"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"np.float64(0.8561408655827665)"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model3.rsquared"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The share of campaign expenditures explains over 85% of the variation in the election outcomes for this sample"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Exercises"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### C1\n",
"The data in 401K are a subset of data analyzed by Papke (1995) to study the relationship between\n",
"participation in a 401(k) pension plan and the generosity of the plan. The variable prate is the percentage of eligible workers with an active account; this is the variable we would like to explain. The\n",
"measure of generosity is the plan match rate, mrate. This variable gives the average amount the firm\n",
"contributes to each worker’s plan for each $1 contribution by the worker. For example , if mrate = 0.50, then a $1 contribution by the worker is matched by a 50¢ contribution by the firm."
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" prate | \n",
" mrate | \n",
" totpart | \n",
" totelg | \n",
" age | \n",
" totemp | \n",
" sole | \n",
" ltotemp | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 26.100000 | \n",
" 0.21 | \n",
" 1653.0 | \n",
" 6322.0 | \n",
" 8 | \n",
" 8709.0 | \n",
" 0 | \n",
" 9.072112 | \n",
"
\n",
" \n",
" 1 | \n",
" 100.000000 | \n",
" 1.42 | \n",
" 262.0 | \n",
" 262.0 | \n",
" 6 | \n",
" 315.0 | \n",
" 1 | \n",
" 5.752573 | \n",
"
\n",
" \n",
" 2 | \n",
" 97.599998 | \n",
" 0.91 | \n",
" 166.0 | \n",
" 170.0 | \n",
" 10 | \n",
" 275.0 | \n",
" 1 | \n",
" 5.616771 | \n",
"
\n",
" \n",
" 3 | \n",
" 100.000000 | \n",
" 0.42 | \n",
" 257.0 | \n",
" 257.0 | \n",
" 7 | \n",
" 500.0 | \n",
" 0 | \n",
" 6.214608 | \n",
"
\n",
" \n",
" 4 | \n",
" 82.500000 | \n",
" 0.53 | \n",
" 591.0 | \n",
" 716.0 | \n",
" 28 | \n",
" 933.0 | \n",
" 1 | \n",
" 6.838405 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" prate mrate totpart totelg age totemp sole ltotemp\n",
"0 26.100000 0.21 1653.0 6322.0 8 8709.0 0 9.072112\n",
"1 100.000000 1.42 262.0 262.0 6 315.0 1 5.752573\n",
"2 97.599998 0.91 166.0 170.0 10 275.0 1 5.616771\n",
"3 100.000000 0.42 257.0 257.0 7 500.0 0 6.214608\n",
"4 82.500000 0.53 591.0 716.0 28 933.0 1 6.838405"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = dataWoo('401K')\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"name of dataset: 401k\n",
"no of variables: 8\n",
"no of observations: 1534\n",
"\n",
"+----------+---------------------------------+\n",
"| variable | label |\n",
"+----------+---------------------------------+\n",
"| prate | participation rate, percent |\n",
"| mrate | 401k plan match rate |\n",
"| totpart | total 401k participants |\n",
"| totelg | total eligible for 401k plan |\n",
"| age | age of 401k plan |\n",
"| totemp | total number of firm employees |\n",
"| sole | = 1 if 401k is firm's sole plan |\n",
"| ltotemp | log of totemp |\n",
"+----------+---------------------------------+\n",
"\n",
"L.E. Papke (1995), “Participation in and Contributions to 401(k)\n",
"Pension Plans:Evidence from Plan Data,” Journal of Human Resources 30,\n",
"311-325. Professor Papke kindly provided these data. She gathered them\n",
"from the Internal Revenue Service’s Form 5500 tapes.\n"
]
}
],
"source": [
"dataWoo('401K', description=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"(i) Find the average participation rate and the average match rate in the sample of plans."
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Average participation rate: 87.36\n",
"Average match rate: 0.73\n"
]
}
],
"source": [
"print(\"Average participation rate:\", round(df['prate'].mean(), 2))\n",
"print(\"Average match rate:\", round(df['mrate'].mean(), 2))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"(ii) Now, estimate the simple regression equation\n",
"$$\n",
"\\hat{prate} = \\hat{b}_0 + \\hat{b}_1 mrate\n",
"$$\n",
"\n",
"and report the results along with the sample size and R-squared"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"results: Intercept 83.075455\n",
"mrate 5.861079\n",
"dtype: float64\n",
"R squared: 0.075\n",
"Sample size: 1534.0\n"
]
}
],
"source": [
"prate_hat = smf.ols(\"prate ~ 1 + mrate\", data=df).fit()\n",
"\n",
"print(\"results:\", prate_hat.params)\n",
"\n",
"print(\"R squared:\", prate_hat.rsquared.__round__(3))\n",
"\n",
"print(\"Sample size:\", prate_hat.nobs)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"(iii) Interpret the intercept in your equation. Interpret the coefficient on mrate."
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"intercept: 83.08\n"
]
}
],
"source": [
"print('intercept:', prate_hat.params.iloc[0].__round__(2))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Find the predicted prate when mrate = 3.5. Is this a reasonable prediction? Explain what is\n",
"happening here."
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 103.59\n",
"dtype: float64"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"round(prate_hat.predict({'mrate': 3.5}), 2)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"(v) How much of the variation in prate is explained by mrate? Is this a lot in your opinion?"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Percentage explained: 7.5\n"
]
}
],
"source": [
"print(\"Percentage explained:\", round(prate_hat.rsquared * 100, 1))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### C2\n",
"The data set in CEOSAL2 contains information on chief executive officers for U.S. corporations. The\n",
"variable salary is annual compensation, in thousands of dollars, and ceoten is prior number of years as\n",
"company CEO."
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" salary | \n",
" age | \n",
" college | \n",
" grad | \n",
" comten | \n",
" ceoten | \n",
" sales | \n",
" profits | \n",
" mktval | \n",
" lsalary | \n",
" lsales | \n",
" lmktval | \n",
" comtensq | \n",
" ceotensq | \n",
" profmarg | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1161 | \n",
" 49 | \n",
" 1 | \n",
" 1 | \n",
" 9 | \n",
" 2 | \n",
" 6200.0 | \n",
" 966 | \n",
" 23200.0 | \n",
" 7.057037 | \n",
" 8.732305 | \n",
" 10.051908 | \n",
" 81 | \n",
" 4 | \n",
" 15.580646 | \n",
"
\n",
" \n",
" 1 | \n",
" 600 | \n",
" 43 | \n",
" 1 | \n",
" 1 | \n",
" 10 | \n",
" 10 | \n",
" 283.0 | \n",
" 48 | \n",
" 1100.0 | \n",
" 6.396930 | \n",
" 5.645447 | \n",
" 7.003066 | \n",
" 100 | \n",
" 100 | \n",
" 16.961130 | \n",
"
\n",
" \n",
" 2 | \n",
" 379 | \n",
" 51 | \n",
" 1 | \n",
" 1 | \n",
" 9 | \n",
" 3 | \n",
" 169.0 | \n",
" 40 | \n",
" 1100.0 | \n",
" 5.937536 | \n",
" 5.129899 | \n",
" 7.003066 | \n",
" 81 | \n",
" 9 | \n",
" 23.668638 | \n",
"
\n",
" \n",
" 3 | \n",
" 651 | \n",
" 55 | \n",
" 1 | \n",
" 0 | \n",
" 22 | \n",
" 22 | \n",
" 1100.0 | \n",
" -54 | \n",
" 1000.0 | \n",
" 6.478509 | \n",
" 7.003066 | \n",
" 6.907755 | \n",
" 484 | \n",
" 484 | \n",
" -4.909091 | \n",
"
\n",
" \n",
" 4 | \n",
" 497 | \n",
" 44 | \n",
" 1 | \n",
" 1 | \n",
" 8 | \n",
" 6 | \n",
" 351.0 | \n",
" 28 | \n",
" 387.0 | \n",
" 6.208590 | \n",
" 5.860786 | \n",
" 5.958425 | \n",
" 64 | \n",
" 36 | \n",
" 7.977208 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" salary age college grad comten ceoten sales profits mktval \\\n",
"0 1161 49 1 1 9 2 6200.0 966 23200.0 \n",
"1 600 43 1 1 10 10 283.0 48 1100.0 \n",
"2 379 51 1 1 9 3 169.0 40 1100.0 \n",
"3 651 55 1 0 22 22 1100.0 -54 1000.0 \n",
"4 497 44 1 1 8 6 351.0 28 387.0 \n",
"\n",
" lsalary lsales lmktval comtensq ceotensq profmarg \n",
"0 7.057037 8.732305 10.051908 81 4 15.580646 \n",
"1 6.396930 5.645447 7.003066 100 100 16.961130 \n",
"2 5.937536 5.129899 7.003066 81 9 23.668638 \n",
"3 6.478509 7.003066 6.907755 484 484 -4.909091 \n",
"4 6.208590 5.860786 5.958425 64 36 7.977208 "
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2 = dataWoo(\"CEOSAL2\")\n",
"df2.head()"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"name of dataset: ceosal2\n",
"no of variables: 15\n",
"no of observations: 177\n",
"\n",
"+----------+--------------------------------+\n",
"| variable | label |\n",
"+----------+--------------------------------+\n",
"| salary | 1990 compensation, $1000s |\n",
"| age | in years |\n",
"| college | =1 if attended college |\n",
"| grad | =1 if attended graduate school |\n",
"| comten | years with company |\n",
"| ceoten | years as ceo with company |\n",
"| sales | 1990 firm sales, millions |\n",
"| profits | 1990 profits, millions |\n",
"| mktval | market value, end 1990, mills. |\n",
"| lsalary | log(salary) |\n",
"| lsales | log(sales) |\n",
"| lmktval | log(mktval) |\n",
"| comtensq | comten^2 |\n",
"| ceotensq | ceoten^2 |\n",
"| profmarg | profits as % of sales |\n",
"+----------+--------------------------------+\n",
"\n",
"See CEOSAL1.RAW\n"
]
}
],
"source": [
"dataWoo(\"CEOSAL2\", description=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"(i) Find the average salary and the average tenure in the sample."
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Average Salary: 865.864\n",
"Average ceoten 7.95\n"
]
}
],
"source": [
"print(\"Average Salary:\", round(df2['salary'].mean(), 3))\n",
"print(\"Average ceoten\", round(df2[\"ceoten\"].mean(), 2))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"(ii) How many CEOs are in their first year as CEO (that is, ceoten = 0)? What is the longest tenure\n",
"as a CEO?"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Number of first year CEO: 5\n",
"Longest Tenure: 37\n"
]
}
],
"source": [
"print(\"Number of first year CEO:\", (df2['ceoten'] == 0).sum())\n",
"print(\"Longest Tenure:\", df2[\"ceoten\"].max())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"(iii) Estimate the simple regression model \n",
"$$\n",
"\\log(salary) = {B}_0 + {B}_1 {ceoten} + u,\n",
"$$\n",
"and report your results in the usual form. What is the (approximate) predicted percentage\n",
"increase in salary given one more year as a CEO?"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Paramters:\n",
"Intercept 6.505498\n",
"ceoten 0.009724\n",
"dtype: float64\n",
"Percentage increase: 0.97\n"
]
}
],
"source": [
"log_salary_hat = smf.ols(\"np.log(salary) ~ 1 + ceoten\", data=df2).fit()\n",
"\n",
"print(\"Paramters:\\n\", log_salary_hat.params, sep='')\n",
"\n",
"print(\"Percentage increase:\", round(log_salary_hat.params.iloc[1] * 100, 2))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### C3 \n",
"Use the data in SLEEP75 from Biddle and Hamermesh (1990) to study whether there is a tradeoff\n",
"between the time spent sleeping per week and the time spent in paid work. We could use either variable\n",
"as the dependent variable. For concreteness, \n",
"estimate the model \n",
"$$\n",
"sleep = B_0 + B_1 totwrk + u\n",
"$$ \n",
"where sleep is minutes spent sleeping at night per week and totwrk is total minutes worked during the week"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" age | \n",
" black | \n",
" case | \n",
" clerical | \n",
" construc | \n",
" educ | \n",
" earns74 | \n",
" gdhlth | \n",
" inlf | \n",
" leis1 | \n",
" ... | \n",
" spwrk75 | \n",
" totwrk | \n",
" union | \n",
" worknrm | \n",
" workscnd | \n",
" exper | \n",
" yngkid | \n",
" yrsmarr | \n",
" hrwage | \n",
" agesq | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 32 | \n",
" 0 | \n",
" 1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 12 | \n",
" 0.0 | \n",
" 0 | \n",
" 1 | \n",
" 3529 | \n",
" ... | \n",
" 0 | \n",
" 3438 | \n",
" 0 | \n",
" 3438 | \n",
" 0 | \n",
" 14 | \n",
" 0 | \n",
" 13 | \n",
" 7.070004 | \n",
" 1024 | \n",
"
\n",
" \n",
" 1 | \n",
" 31 | \n",
" 0 | \n",
" 2 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 14 | \n",
" 9500.0 | \n",
" 1 | \n",
" 1 | \n",
" 2140 | \n",
" ... | \n",
" 0 | \n",
" 5020 | \n",
" 0 | \n",
" 5020 | \n",
" 0 | \n",
" 11 | \n",
" 0 | \n",
" 0 | \n",
" 1.429999 | \n",
" 961 | \n",
"
\n",
" \n",
" 2 | \n",
" 44 | \n",
" 0 | \n",
" 3 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 17 | \n",
" 42500.0 | \n",
" 1 | \n",
" 1 | \n",
" 4595 | \n",
" ... | \n",
" 1 | \n",
" 2815 | \n",
" 0 | \n",
" 2815 | \n",
" 0 | \n",
" 21 | \n",
" 0 | \n",
" 0 | \n",
" 20.529997 | \n",
" 1936 | \n",
"
\n",
" \n",
" 3 | \n",
" 30 | \n",
" 0 | \n",
" 4 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 12 | \n",
" 42500.0 | \n",
" 1 | \n",
" 1 | \n",
" 3211 | \n",
" ... | \n",
" 1 | \n",
" 3786 | \n",
" 0 | \n",
" 3786 | \n",
" 0 | \n",
" 12 | \n",
" 0 | \n",
" 12 | \n",
" 9.619998 | \n",
" 900 | \n",
"
\n",
" \n",
" 4 | \n",
" 64 | \n",
" 0 | \n",
" 5 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 14 | \n",
" 2500.0 | \n",
" 1 | \n",
" 1 | \n",
" 4052 | \n",
" ... | \n",
" 1 | \n",
" 2580 | \n",
" 0 | \n",
" 2580 | \n",
" 0 | \n",
" 44 | \n",
" 0 | \n",
" 33 | \n",
" 2.750000 | \n",
" 4096 | \n",
"
\n",
" \n",
"
\n",
"
5 rows × 34 columns
\n",
"
"
],
"text/plain": [
" age black case clerical construc educ earns74 gdhlth inlf leis1 \\\n",
"0 32 0 1 0.0 0.0 12 0.0 0 1 3529 \n",
"1 31 0 2 0.0 0.0 14 9500.0 1 1 2140 \n",
"2 44 0 3 0.0 0.0 17 42500.0 1 1 4595 \n",
"3 30 0 4 0.0 0.0 12 42500.0 1 1 3211 \n",
"4 64 0 5 0.0 0.0 14 2500.0 1 1 4052 \n",
"\n",
" ... spwrk75 totwrk union worknrm workscnd exper yngkid yrsmarr \\\n",
"0 ... 0 3438 0 3438 0 14 0 13 \n",
"1 ... 0 5020 0 5020 0 11 0 0 \n",
"2 ... 1 2815 0 2815 0 21 0 0 \n",
"3 ... 1 3786 0 3786 0 12 0 12 \n",
"4 ... 1 2580 0 2580 0 44 0 33 \n",
"\n",
" hrwage agesq \n",
"0 7.070004 1024 \n",
"1 1.429999 961 \n",
"2 20.529997 1936 \n",
"3 9.619998 900 \n",
"4 2.750000 4096 \n",
"\n",
"[5 rows x 34 columns]"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df3 = dataWoo(\"sleep75\")\n",
"df3.head()"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"name of dataset: sleep75\n",
"no of variables: 34\n",
"no of observations: 706\n",
"\n",
"+----------+--------------------------------+\n",
"| variable | label |\n",
"+----------+--------------------------------+\n",
"| age | in years |\n",
"| black | =1 if black |\n",
"| case | identifier |\n",
"| clerical | =1 if clerical worker |\n",
"| construc | =1 if construction worker |\n",
"| educ | years of schooling |\n",
"| earns74 | total earnings, 1974 |\n",
"| gdhlth | =1 if in good or excel. health |\n",
"| inlf | =1 if in labor force |\n",
"| leis1 | sleep - totwrk |\n",
"| leis2 | slpnaps - totwrk |\n",
"| leis3 | rlxall - totwrk |\n",
"| smsa | =1 if live in smsa |\n",
"| lhrwage | log hourly wage |\n",
"| lothinc | log othinc, unless othinc < 0 |\n",
"| male | =1 if male |\n",
"| marr | =1 if married |\n",
"| prot | =1 if Protestant |\n",
"| rlxall | slpnaps + personal activs |\n",
"| selfe | =1 if self employed |\n",
"| sleep | mins sleep at night, per wk |\n",
"| slpnaps | minutes sleep, inc. naps |\n",
"| south | =1 if live in south |\n",
"| spsepay | spousal wage income |\n",
"| spwrk75 | =1 if spouse works |\n",
"| totwrk | mins worked per week |\n",
"| union | =1 if belong to union |\n",
"| worknrm | mins work main job |\n",
"| workscnd | mins work second job |\n",
"| exper | age - educ - 6 |\n",
"| yngkid | =1 if children < 3 present |\n",
"| yrsmarr | years married |\n",
"| hrwage | hourly wage |\n",
"| agesq | age^2 |\n",
"+----------+--------------------------------+\n",
"\n",
"J.E. Biddle and D.S. Hamermesh (1990), “Sleep and the Allocation of\n",
"Time,” Journal of Political Economy 98, 922-943. Professor Biddle\n",
"kindly provided the data.\n"
]
}
],
"source": [
"dataWoo(\"sleep75\", description=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.4"
}
},
"nbformat": 4,
"nbformat_minor": 4
}