Causal Inference - Differences-in-Differences

18 minute read

import pandas as pd
import numpy as np
import random
import statsmodels.api as sm
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedKFold
from sklearn.linear_model import Lasso
from sklearn.feature_selection import SelectFromModel
import matplotlib.pyplot as plt
from tqdm import tqdm
import statsmodels.api as sm
import itertools

import copy
random.seed(10)

img_dir = '/Users/ida/Google Drive/Big Data Econometrics/Slides/images/'

def fn_generate_cov(dim,corr):
    acc  = []
    for i in range(dim):
        row = np.ones((1,dim)) * corr
        row[0][i] = 1
        acc.append(row)
    return np.concatenate(acc,axis=0)

def fn_generate_multnorm(nobs,corr,nvar):

    mu = np.zeros(nvar)
    std = (np.abs(np.random.normal(loc = 1, scale = .5,size = (nvar,1))))**(1/2)
    # generate random normal distribution
    acc = []
    for i in range(nvar):
        acc.append(np.reshape(np.random.normal(mu[i],std[i],nobs),(nobs,-1)))
    
    normvars = np.concatenate(acc,axis=1)

    cov = fn_generate_cov(nvar,corr)
    C = np.linalg.cholesky(cov)

    Y = np.transpose(np.dot(C,np.transpose(normvars)))

#     return (Y,np.round(np.corrcoef(Y,rowvar=False),2))
    return Y

def fn_randomize_treatment(N,p=0.5):
    treated = random.sample(range(N), round(N*p))
    return np.array([(1 if i in treated else 0) for i in range(N)]).reshape([N,1])

def fn_bias_rmse_size(theta0,thetahat,se_thetahat,cval = 1.96):
    b = thetahat - theta0
    bias = np.mean(b)
    rmse = np.sqrt(np.mean(b**2))
    tval = b/se_thetahat
    size = np.mean(1*(np.abs(tval)>cval))
    # note size calculated at true parameter value
    return (bias,rmse,size)


def fn_plot_with_ci(n_values,tauhats,tau,lb,ub,caption):
    fig = plt.figure(figsize = (10,6))
    plt.plot(n_values,tauhats,label = '$\hat{\\tau}$')
    plt.xlabel('N')
    plt.ylabel('$\hat{\\tau}$')
    plt.axhline(y=tau, color='r', linestyle='-',linewidth=1,
                label='True $\\tau$={}'.format(tau))
    plt.title('{}'.format(caption))
    plt.fill_between(n_values, lb, ub,
        alpha=0.5, edgecolor='#FF9848', facecolor='#FF9848',label = '95% CI')
    plt.legend()
    
def fn_group_to_ind(vecG,n_g):
    """
    Transform group effects vector of length G into an n-length individual effects vector 
    """
#     g = len(vecG)
#     return np.concatenate([np.concatenate([vecG[g] for i in range(n_g)]) for g in range(G)]).\
#     reshape([n,1])
    return np.array(list(itertools.chain.from_iterable(itertools.repeat(x,n_g) for x in vecG)))

def fn_ind_to_panel(vec,T):
    """
    Transform (n x 1) vector of individual specific effect to an (n x T) matrix
    """
    return np.concatenate([vec for i in range(T)],axis =1)

def fn_mat_wide_to_long(mat,n,T):
    """
    Take (n x T) matrix and output nxT vector with observations for each t stacked on top
    """
    return np.concatenate([mat[:,i] for i in range(T)]).reshape([n*T,1])

def fn_create_wide_to_long_df(data,colnames,n,G,T):
    """
    Take list of matrices in wide format and output a dataframe in long format
    """
    n_g = n/G # number of observations in each group
    if n_g.is_integer()==False:
        print('Error: n_g is not an integer')
    else:
        n_g = int(n_g)
    group = np.concatenate([fn_group_to_ind(np.array(range(G)).reshape([G,1]),n_g) for i in range(T)])
    if len(data)!=len(colnames):
        print('Error: number of column names not equal to number of variables')
    dataDict = {}
    for i in range(len(colnames)):
        dataDict[colnames[i]] = fn_mat_wide_to_long(data[i],n,T)[:,0]

    dataDict['group'] = group[:,0]+1
    dataDict['n'] = 1+np.concatenate([range(n) for i in range(T)])
    dataDict['t'] = 1+np.array(list(itertools.chain.from_iterable(itertools.repeat(i,n) for i in range(T))))
    return pd.DataFrame(dataDict)

# def fn_generate_grouped_panel(n,G,T,n_treated,treat_start,beta,delta,linear_trend = False):
#     n_g = n/G # number of observations in each group
#     if n_g.is_integer()==False:
#         print('Error: n_g is not an integer')
#     else:
#         n_g = int(n_g)

#     alphaG = np.random.normal(1,1,[G,1]) # (G x 1) group fixed effects
#     alpha_i = fn_group_to_ind(alphaG,n_g) # (n x 1)
#     treatG = np.zeros([G,1])
#     treatG[:n_treated,] = 1
#     treat_i = fn_group_to_ind(treatG,n_g) # (n x 1)
#     # convert to (n X T)
#     if linear_trend==True:
#         gamma = np.vstack([np.array(range(T)) for i in range(n)])
#     else:
#         gamma = np.ones([n,1])*np.random.normal(0,1,[1,T]) # (n x T) # time specific effects   
#     alpha = fn_ind_to_panel(alpha_i,T)
#     treat = fn_ind_to_panel(treat_i,T)
#     treat = np.concatenate([0*treat[:,:treat_start],treat[:,treat_start:]],axis = 1)
    
#     sig = (np.random.chisquare(2,[n,1])/4+0.5)**(1/2)
#     eps = sig*((np.random.chisquare(2,[n,T])-2)*0.5) # (n x T)
#     X = np.concatenate([fn_generate_multnorm(n,.5,1) for i in range(T)],axis =1) # (n x T)
#     Y = alpha + gamma + beta*treat + delta*X+eps
    
#     return [Y,alpha,gamma,treat,X]

def fn_plot_dd(dfg,treat_start,fig_name=False):
    """
    Plot average outcome Y by group
    """
    
    Yg = dfg[['Y','I','group','t']].groupby(['group','t']).mean().reset_index()
    treatStatus = dict(zip(Yg[Yg.t==Yg.t.max()]['group'],Yg[Yg.t==Yg.t.max()]['I']))
    fig = plt.figure(figsize = (10,6))
    for g in Yg.group.unique():
        plt.plot(Yg[Yg.group==g]['t'],Yg[Yg.group==g]['Y'],label = 'treatment={}'.format(int(treatStatus[g])))
    plt.axvline(x=treat_start+1,color = 'red')
    plt.xlabel('time period')
    plt.ylabel('outcome')
    plt.legend()
    if fig_name:
        plt.savefig(img_dir + 'dd1.png')
        
def fn_within_transformation(dfg,varlist,group_var):
    """
    Transform each variable in the varlist using the within transformation to eliminate
    the group-fixed effects
    """
    dfm = dfg[varlist+[group_var]].groupby([group_var]).mean().\
                reset_index().\
                rename(columns = {k:'{}_bar'.format(k) for k in varlist})
    dfg = dfg.merge(dfm, on = ['group'],how = 'left')
    dfg['const'] = 1
    for v in varlist:
        dfg['{}_w'.format(v)] = dfg[v] - dfg['{}_bar'.format(v)]
    return dfg

Statsmodels sandwich variance estimators https://github.com/statsmodels/statsmodels/blob/master/statsmodels/stats/sandwich_covariance.py

1. Generate data

$Y_{it} = W_{it}\tau_{it}+\mu_i+\delta_t+\varepsilon_{it}$

$\varepsilon_{it}^{\left( r\right) }/\sigma_{i}\sim IID\left[\chi^{2}(2)-2\right] /2$

$\sigma_{i}^{2}\sim IID\left[ \chi^{2}(2)/4+0.5\right]$

# def fn_randomize_treatment(N,p=0.5):
N = 11
p = .5
treated = random.sample(range(N), round(N*p))
# 

treated

[9, 0, 6, 7, 4, 10]

l1 = list(range(10))
l1

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

%%timeit
# add 2 to every element
l2 = []
for el in range(len(l1)):
    l2 = l2 + [el+2]
l2

1.23 µs ± 90.1 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)

%%timeit
[(i+2) for i in l1]

525 ns ± 5.34 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)

np.array([(1 if i in treated else 0) for i in range(N)]).reshape([N,1])

array([[1],
       [0],
       [0],
       [0],
       [1],
       [0],
       [1],
       [1],
       [0],
       [1],
       [1]])

n = 1000
T = 2
t_treat = 2# first treatment period
p = 0.5
tau = 2
mu = np.random.normal(1,1,[n,1]) # (n x 1)
sig = (np.random.chisquare(2,[n,1])/4+0.5)**(1/2)
eps = sig*((np.random.chisquare(2,[n,T])-2)*0.5) # (n x T)
# eps = np.random.normal([n,T])
delta = np.ones([n,1])*np.random.normal(0,1,[1,T]) # (n x T)
treat = fn_randomize_treatment(n,p) # (n x 1)
W = np.concatenate([np.zeros([n,t_treat-1]),treat*np.ones([n,T-t_treat+1])],axis = 1)
Y = W*tau+mu+delta+eps
Y.shape

(1000, 2)

# constant, treatment dummy, time period = 2 dummy , interaction of those 2 dummies
vecY = np.concatenate([Y[:,0],Y[:,1]]).reshape(2000,1)
vecW = np.concatenate([W[:,0],W[:,1]]).reshape(2000,1)
vecT = np.concatenate([np.zeros([n,1]),np.ones([n,1])])
vecInt = vecW*vecT
vecConst = np.ones([2000,1])xvars = np.hstack([vecConst,vecW,vecT,vecInt])
mod = sm.OLS(vecY,xvars)
res = mod.fit()
print(res.summary())

(2000, 4)

                            OLS Regression Results                            
==============================================================================
Dep. Variable:                      y   R-squared:                       0.196
Model:                            OLS   Adj. R-squared:                  0.195
Method:                 Least Squares   F-statistic:                     242.9
Date:                Wed, 17 Feb 2021   Prob (F-statistic):           3.68e-95
Time:                        16:53:15   Log-Likelihood:                -3549.5
No. Observations:                2000   AIC:                             7105.
Df Residuals:                    1997   BIC:                             7122.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.6274      0.045     13.889      0.000       0.539       0.716
x1             0.9360      0.045     20.723      0.000       0.847       1.025
x2            -0.4562      0.078     -5.831      0.000      -0.610      -0.303
x3             0.9360      0.045     20.723      0.000       0.847       1.025
==============================================================================
Omnibus:                      287.827   Durbin-Watson:                   1.943
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              671.668
Skew:                           0.819   Prob(JB):                    1.41e-146
Kurtosis:                       5.319   Cond. No.                     9.13e+15
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The smallest eigenvalue is 3.7e-29. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.

Yt

array([[2.07832488, 1.68320036],
       [3.45006643, 1.64195153],
       [3.85554136, 2.00972155],
       [3.49435427, 1.76502504],
       [2.33965988, 0.87416435]])

Yt = Y[np.where(treat==1)[0],:] #select rows where the corresponding row in treat = 1
Yc = Y[np.where(treat==0)[0],:]
Yt.shape, Yc.shape

((500, 2), (500, 2))

plt.hist(eps)
plt.title('Distribution of errors')

Text(0.5, 1.0, 'Distribution of errors')

png

Estimate parameter of interest by running a regression of $\Delta Y_{i1}$ on the treatment indicator and an intercept

Ydiff = Y[:,1]-Y[:,0]
Wmod = copy.deepcopy(W)
Wmod[:,0] = 1
model = sm.OLS(Ydiff,Wmod)

res = model.fit()
res.summary()

OLS Regression Results
Dep. Variable:	y	R-squared:	0.328
Model:	OLS	Adj. R-squared:	0.327
Method:	Least Squares	F-statistic:	487.3
Date:	Mon, 08 Feb 2021	Prob (F-statistic):	2.96e-88
Time:	13:40:23	Log-Likelihood:	-1780.2
No. Observations:	1000	AIC:	3564.
Df Residuals:	998	BIC:	3574.
Df Model:	1
Covariance Type:	nonrobust

	coef	std err	t	P>\|t\|	[0.025	0.975]
const	-0.5229	0.064	-8.140	0.000	-0.649	-0.397
x1	2.0057	0.091	22.075	0.000	1.827	2.184

Omnibus:	138.012	Durbin-Watson:	1.955
Prob(Omnibus):	0.000	Jarque-Bera (JB):	959.762
Skew:	0.403	Prob(JB):	3.89e-209
Kurtosis:	7.731	Cond. No.	2.62

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

$Y_{i1} - Y_{i0} = \mu_i + ……. - (\mu_i + …….)$

Yt_diff = np.mean(Yt[:,1])-np.mean(Yt[:,0]) # eliminates mu_i
Yc_diff = np.mean(Yc[:,1])-np.mean(Yc[:,0]) # eliminates mu_i
Yt_diff - Yc_diff # eliminates the delta_t

2.0056559437520955

v_Y = np.concatenate([Y[:,i] for i in range(T)]).reshape([2*n,1])
# v_W = np.concatenate([W[:,i] for i in range(T)]).reshape([2*n,1])
v_W = np.concatenate([W[:,1] for i in range(T)]).reshape([2*n,1])
v_mu = np.concatenate([mu for i in range(T)])
v_n = np.concatenate([range(1,n+1) for i in range(T)]).reshape([2*n,1])
v_t = np.concatenate([i*np.ones([n,1]) for i in range(1,T+1)])

df = pd.DataFrame({'y':v_Y[:,0],
                   'W':v_W[:,0],
                   'mu':v_mu[:,0],
                   'n':v_n[:,0],
                   't':v_t[:,0]})

Now let $T_i = 1$ if $t=2$ and $T_i=0$ otherwise and run regression

$Y_i = \alpha+W_{i1}+T_i+W_{i1}*T_i$

df['Ti'] = 1*(df.t==2)
df['const'] = 1
df['int'] = df.W*df.Ti

mod = sm.OLS(df.y,df[['const','W','Ti','int']])
res = mod.fit()
res.summary()

OLS Regression Results
Dep. Variable:	y	R-squared:	0.212
Model:	OLS	Adj. R-squared:	0.211
Method:	Least Squares	F-statistic:	179.4
Date:	Mon, 08 Feb 2021	Prob (F-statistic):	5.26e-103
Time:	13:40:24	Log-Likelihood:	-3550.4
No. Observations:	2000	AIC:	7109.
Df Residuals:	1996	BIC:	7131.
Df Model:	3
Covariance Type:	nonrobust

	coef	std err	t	P>\|t\|	[0.025	0.975]
const	0.6379	0.064	9.978	0.000	0.512	0.763
W	-0.0210	0.090	-0.232	0.816	-0.198	0.156
Ti	-0.5229	0.090	-5.785	0.000	-0.700	-0.346
int	2.0057	0.128	15.687	0.000	1.755	2.256

Omnibus:	295.177	Durbin-Watson:	1.945
Prob(Omnibus):	0.000	Jarque-Bera (JB):	702.857
Skew:	0.831	Prob(JB):	2.38e-153
Kurtosis:	5.381	Cond. No.	6.85

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

Let’s focus on the more interesting case where we observe multiple time periods and multiple groups

$Y_{igt} = \alpha_g+\gamma_t+\beta I_{gt}+\delta X_{igt}+\varepsilon_{igt}$

Assessing DD identification the key identifying assumption in DD models is that the treatment states/groups (g) have similar trends to the control states in the absence of treatment

def fn_generate_grouped_panel(n,G,T,n_treated,treat_start,beta,delta,linear_trend = False,corr = False):
    n_g = n/G # number of observations in each group
    if n_g.is_integer()==False:
        return print('Error: n_g is not an integer')

    else:
        n_g = int(n_g)

        alphaG = np.random.normal(1,1,[G,1]) # (G x 1) group fixed effects
        alpha_i = fn_group_to_ind(alphaG,n_g) # (n x 1)
        treatG = np.zeros([G,1])
        treatG[:n_treated,] = 1
        treat_i = fn_group_to_ind(treatG,n_g) # (n x 1)
        # convert to (n X T)
        if linear_trend==True:
            gamma = np.vstack([np.array(range(T)) for i in range(n)])
        else:
            gamma = np.ones([n,1])*np.random.normal(0,1,[1,T]) # (n x T) # time specific effects   
        alpha = fn_ind_to_panel(alpha_i,T)
        treat = fn_ind_to_panel(treat_i,T)
        treat = np.concatenate([0*treat[:,:treat_start],treat[:,treat_start:]],axis = 1)

        sig = (np.random.chisquare(2,[n,1])/4+0.5)**(1/2)
        eps = sig*((np.random.chisquare(2,[n,T])-2)*0.5) # (n x T)
        
        if corr == True:
            rho = .7
            u = np.zeros(eps.shape)
            u[:,0] = eps[:,0]
            for t in range(1,T):
                u[:,t] = rho*u[:,t-1]+eps[:,t]
                
        X = np.concatenate([fn_generate_multnorm(n,.5,1) for i in range(T)],axis =1) # (n x T)
        if corr == False:
            Y = alpha + gamma + beta*treat + delta*X+eps
        else:
            Y = alpha + gamma + beta*treat + delta*X + u

        return [Y,alpha,gamma,treat,X]

# def fn_generate_grouped_panel(n,G,T,n_treated,treat_start,beta,delta,linear_trend = False,corr = False):

n = 10
G = 2
T = 5
n_treated = 1
treat_start = 4
beta = 1
delta = .5
linear_trend = True

n_g = n/G # number of observations in each group
if n_g.is_integer()==False:
    print('Error: n_g is not an integer')

n_g = int(n_g)

alphaG = np.random.normal(1,1,[G,1]) # (G x 1) group fixed effects
alpha_i = fn_group_to_ind(alphaG,n_g) # (n x 1)

treatG = np.zeros([G,1])
treatG[:n_treated,] = 1
treat_i = fn_group_to_ind(treatG,n_g) # (n x 1)

treat_i,treatG

(array([[1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.]]),
 array([[1.],
        [0.]]))

gamma = np.ones([n,1])*np.random.normal(0,1,[1,T])
gamma

array([[ 0.53272734,  0.51828703, -0.48633918, -1.74128078, -2.98146079],
       [ 0.53272734,  0.51828703, -0.48633918, -1.74128078, -2.98146079],
       [ 0.53272734,  0.51828703, -0.48633918, -1.74128078, -2.98146079],
       [ 0.53272734,  0.51828703, -0.48633918, -1.74128078, -2.98146079],
       [ 0.53272734,  0.51828703, -0.48633918, -1.74128078, -2.98146079],
       [ 0.53272734,  0.51828703, -0.48633918, -1.74128078, -2.98146079],
       [ 0.53272734,  0.51828703, -0.48633918, -1.74128078, -2.98146079],
       [ 0.53272734,  0.51828703, -0.48633918, -1.74128078, -2.98146079],
       [ 0.53272734,  0.51828703, -0.48633918, -1.74128078, -2.98146079],
       [ 0.53272734,  0.51828703, -0.48633918, -1.74128078, -2.98146079]])

Serially corellated errors:

$u_t = \rho*u_{t-1}+e_t$

# convert to (n X T)
if linear_trend==True:
    gamma = np.vstack([np.array(range(T)) for i in range(n)])
else:
    gamma = np.ones([n,1])*np.random.normal(0,1,[1,T]) # (n x T) # time specific effects   
alpha = fn_ind_to_panel(alpha_i,T)
treat = fn_ind_to_panel(treat_i,T)
treat = np.concatenate([0*treat[:,:treat_start],treat[:,treat_start:]],axis = 1)

sig = (np.random.chisquare(2,[n,1])/4+0.5)**(1/2)
eps = sig*((np.random.chisquare(2,[n,T])-2)*0.5) # (n x T)

if corr == True:
    rho = .7
    u = np.zeros(eps.shape)
    u[:,0] = eps[:,0]
    for t in range(1,T):
        u[:,t] = rho*u[:,t-1]+eps[:,t]
X = np.concatenate([fn_generate_multnorm(n,.5,1) for i in range(T)],axis =1) # (n x T)

alpha.shape, gamma.shape, treat.shape

((10, 5), (10, 5), (10, 5))

treat

array([[0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

if corr == False:
    Y = alpha + gamma + beta*treat + delta*X+eps
else:
    Y = alpha + gamma + beta*treat + delta*X + u

return [Y,alpha,gamma,treat,X]

  File "<ipython-input-207-64a684190739>", line 6
    return [Y,alpha,gamma,treat,X]
    ^
SyntaxError: 'return' outside function

$Y_{igt} = \alpha_g+\gamma_t+\beta I_{gt}+\delta X_{igt}+\varepsilon_{igt}$

Within transformation to get rid of $\alpha_g$:

$Y_{igt} - \bar{Y}_{gt}$

n = 2 # 2 units and 1 unit is in each group
G = 2
n_treated = 1
T = 20

beta = 20
delta = 1
treat_start = 10 # when treatment starts

Plot the outcome for the treatment and control group

linear_trend = True
[Y,alpha,gamma,treat,X]= fn_generate_grouped_panel(n,G,T,n_treated,treat_start,beta,delta,linear_trend)

colnames = ['Y','alpha','gamma','I','X']
data = [Y,alpha,gamma,treat,X]

dfg = fn_create_wide_to_long_df(data,colnames,n,G,T)
fn_plot_dd(dfg,treat_start)

png

linear_trend = False
[Y,alpha,gamma,treat,X]= fn_generate_grouped_panel(n,G,T,n_treated,treat_start,beta,delta,linear_trend)
colnames = ['Y','alpha','gamma','I','X']
data = [Y,alpha,gamma,treat,X]

dfg = fn_create_wide_to_long_df(data,colnames,n,G,T)
fn_plot_dd(dfg,treat_start)

png

Estimation

data = [Y,alpha,gamma,treat,X]
colnames = ['Y','alpha','gamma','I','X']

dfg = fn_create_wide_to_long_df(data,colnames,n,G,T)

dfg = fn_within_transformation(dfg,colnames,'group')   

dfg.head()

	Y	alpha	gamma	X	group	n	t	Y_bar	alpha_bar	gamma_bar	I_bar	X_bar	const	Y_w	alpha_w	gamma_w	I_w	X_w
0	0.319496	-0.895547	-0.546378	1.377812	1	1	1	8.615999	-0.895547	-0.162203	0.5	-0.15752	1	-8.296503	-2.220446e-16	-0.384175	-0.5	1.535332
1	-2.919256	-0.286708	-0.546378	-1.078144	2	2	1	-0.720491	-0.286708	-0.162203	0.0	-0.04039	1	-2.198765	-1.110223e-16	-0.384175	0.0	-1.037754
2	-2.500770	-0.895547	-0.513985	0.332036	1	1	2	8.615999	-0.895547	-0.162203	0.5	-0.15752	1	-11.116769	-2.220446e-16	-0.351782	-0.5	0.489555
3	-0.600016	-0.286708	-0.513985	1.036672	2	2	2	-0.720491	-0.286708	-0.162203	0.0	-0.04039	1	0.120474	-1.110223e-16	-0.351782	0.0	1.077062
4	-0.856377	-0.895547	1.264813	-0.665249	1	1	3	8.615999	-0.895547	-0.162203	0.5	-0.15752	1	-9.472376	-2.220446e-16	1.427016	-0.5	-0.507730

Estimation using a full set of time and group dummies

# pd.get_dummies(dfg.group)

xvars = pd.concat([dfg[['I','X']],
                   pd.get_dummies(dfg['group'],drop_first = False),
                   pd.get_dummies(dfg['t'],drop_first = False)],axis = 1)
mod = sm.OLS(dfg['Y'],xvars)
res = mod.fit()
res.summary()

OLS Regression Results
Dep. Variable:	Y	R-squared:	0.992
Model:	OLS	Adj. R-squared:	0.981
Method:	Least Squares	F-statistic:	93.89
Date:	Mon, 08 Feb 2021	Prob (F-statistic):	1.06e-13
Time:	13:40:33	Log-Likelihood:	-45.914
No. Observations:	40	AIC:	137.8
Df Residuals:	17	BIC:	176.7
Df Model:	22
Covariance Type:	nonrobust

	coef	std err	t	P>\|t\|	[0.025	0.975]
I	20.4001	0.767	26.582	0.000	18.781	22.019
X	1.2694	0.277	4.576	0.000	0.684	1.855
1	-1.2907	0.436	-2.959	0.009	-2.211	-0.370
2	-0.5759	0.251	-2.296	0.035	-1.105	-0.047
1	-0.5568	0.824	-0.676	0.508	-2.295	1.182
2	-1.4858	0.841	-1.767	0.095	-3.260	0.288
3	1.1783	0.834	1.412	0.176	-0.582	2.939
4	-0.1733	0.920	-0.189	0.853	-2.113	1.767
5	-1.3141	0.846	-1.554	0.139	-3.099	0.471
6	1.8082	0.845	2.141	0.047	0.026	3.590
7	1.6278	0.825	1.972	0.065	-0.113	3.369
8	-0.1100	0.824	-0.134	0.895	-1.849	1.629
9	-0.6017	0.824	-0.730	0.475	-2.339	1.136
10	0.5768	0.824	0.700	0.493	-1.162	2.315
11	0.4581	0.836	0.548	0.591	-1.305	2.221
12	0.2653	0.839	0.316	0.756	-1.505	2.035
13	-0.1689	0.835	-0.202	0.842	-1.930	1.592
14	1.6129	0.835	1.932	0.070	-0.148	3.374
15	-0.8069	0.848	-0.951	0.355	-2.596	0.983
16	1.4182	0.864	1.641	0.119	-0.405	3.242
17	-0.6037	0.831	-0.726	0.478	-2.358	1.150
18	-2.1873	0.832	-2.628	0.018	-3.943	-0.431
19	-1.4659	0.968	-1.514	0.148	-3.509	0.577
20	-1.3379	0.840	-1.593	0.130	-3.109	0.434

Omnibus:	4.798	Durbin-Watson:	2.930
Prob(Omnibus):	0.091	Jarque-Bera (JB):	5.432
Skew:	0.000	Prob(JB):	0.0661
Kurtosis:	4.805	Cond. No.	2.05e+16

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The smallest eigenvalue is 9.41e-32. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.

Estimation using a within group transformation

_w denotes variables that come out of the within transformation

xvars = pd.concat([dfg[['I_w','X_w']],
                  pd.get_dummies(dfg['t'])],axis = 1)
mod = sm.OLS(dfg['Y_w'],xvars)
res = mod.fit()
res.summary()

OLS Regression Results
Dep. Variable:	Y_w	R-squared:	0.988
Model:	OLS	Adj. R-squared:	0.975
Method:	Least Squares	F-statistic:	72.02
Date:	Mon, 08 Feb 2021	Prob (F-statistic):	2.62e-13
Time:	13:40:34	Log-Likelihood:	-45.914
No. Observations:	40	AIC:	135.8
Df Residuals:	18	BIC:	173.0
Df Model:	21
Covariance Type:	nonrobust

	coef	std err	t	P>\|t\|	[0.025	0.975]
I_w	20.4001	0.746	27.352	0.000	18.833	21.967
X_w	1.2694	0.270	4.709	0.000	0.703	1.836
1	-0.4634	0.824	-0.563	0.581	-2.194	1.267
2	-1.3925	0.839	-1.659	0.114	-3.156	0.371
3	1.2716	0.834	1.525	0.145	-0.481	3.024
4	-0.0800	0.915	-0.087	0.931	-2.003	1.843
5	-1.2208	0.844	-1.446	0.165	-2.994	0.553
6	1.9015	0.844	2.253	0.037	0.129	3.674
7	1.7212	0.825	2.086	0.051	-0.012	3.455
8	-0.0167	0.824	-0.020	0.984	-1.748	1.715
9	-0.5083	0.824	-0.617	0.545	-2.239	1.222
10	0.6701	0.824	0.813	0.427	-1.061	2.401
11	0.5514	0.828	0.666	0.514	-1.187	2.290
12	0.3586	0.831	0.431	0.671	-1.388	2.105
13	-0.0755	0.827	-0.091	0.928	-1.813	1.662
14	1.7062	0.827	2.063	0.054	-0.031	3.444
15	-0.7136	0.839	-0.850	0.406	-2.477	1.050
16	1.5116	0.856	1.766	0.094	-0.286	3.310
17	-0.5103	0.824	-0.619	0.543	-2.241	1.220
18	-2.0940	0.825	-2.540	0.021	-3.826	-0.362
19	-1.3726	0.954	-1.439	0.167	-3.376	0.631
20	-1.2445	0.831	-1.497	0.152	-2.991	0.502

Omnibus:	4.798	Durbin-Watson:	2.930
Prob(Omnibus):	0.091	Jarque-Bera (JB):	5.432
Skew:	0.000	Prob(JB):	0.0661
Kurtosis:	4.805	Cond. No.	7.06

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

Multiple groups and time periods

$Y_{igt} = \alpha_g+\gamma_t+\beta I_{gt}+\delta X_{igt}+\varepsilon_{igt}$

n = 1000
G = 10
n_treated = 5
T = 20

beta = 20
delta = 1
treat_start = 10

linear_trend = True
[Y,alpha,gamma,treat,X]= fn_generate_grouped_panel(n,G,T,n_treated,treat_start,beta,delta,linear_trend)
colnames = ['Y','alpha','gamma','I','X']
data = [Y,alpha,gamma,treat,X]
dfg = fn_create_wide_to_long_df(data,colnames,n,G,T)
fn_plot_dd(dfg,treat_start)
# dfg.head()

png

Estimation

dfw = fn_within_transformation(dfg,colnames,'group')   

xvars = pd.concat([dfw[['I_w','X_w']],
                  pd.get_dummies(dfw['t'])],axis = 1)
mod = sm.OLS(dfw['Y_w'],xvars,cluster= dfw['group'])
res = mod.fit(cov_type='HC1')
t = res.params.I_w/res.HC1_se.I_w

res.summary()

OLS Regression Results
Dep. Variable:	Y_w	R-squared:	0.993
Model:	OLS	Adj. R-squared:	0.993
Method:	Least Squares	F-statistic:	nan
Date:	Mon, 08 Feb 2021	Prob (F-statistic):	nan
Time:	13:40:36	Log-Likelihood:	-28192.
No. Observations:	20000	AIC:	5.643e+04
Df Residuals:	19978	BIC:	5.660e+04
Df Model:	21
Covariance Type:	HC1

	coef	std err	z	P>\|z\|	[0.025	0.975]
I_w	19.9867	0.028	712.870	0.000	19.932	20.042
X_w	0.9981	0.007	147.009	0.000	0.985	1.011
1	-9.5285	0.032	-294.481	0.000	-9.592	-9.465
2	-8.5477	0.031	-278.041	0.000	-8.608	-8.487
3	-7.4749	0.032	-233.314	0.000	-7.538	-7.412
4	-6.5301	0.031	-213.574	0.000	-6.590	-6.470
5	-5.5166	0.031	-178.651	0.000	-5.577	-5.456
6	-4.4325	0.034	-130.609	0.000	-4.499	-4.366
7	-3.5591	0.030	-116.873	0.000	-3.619	-3.499
8	-2.5156	0.032	-77.789	0.000	-2.579	-2.452
9	-1.4233	0.034	-41.344	0.000	-1.491	-1.356
10	-0.4743	0.032	-14.866	0.000	-0.537	-0.412
11	0.4865	0.032	15.107	0.000	0.423	0.550
12	1.5096	0.033	46.048	0.000	1.445	1.574
13	2.4724	0.031	78.889	0.000	2.411	2.534
14	3.5285	0.031	112.952	0.000	3.467	3.590
15	4.4621	0.030	148.465	0.000	4.403	4.521
16	5.5001	0.032	170.856	0.000	5.437	5.563
17	6.5112	0.032	204.902	0.000	6.449	6.574
18	7.5147	0.035	215.853	0.000	7.446	7.583
19	8.4975	0.034	246.447	0.000	8.430	8.565
20	9.5200	0.031	308.540	0.000	9.460	9.580

Omnibus:	9425.670	Durbin-Watson:	2.007
Prob(Omnibus):	0.000	Jarque-Bera (JB):	68273.339
Skew:	2.146	Prob(JB):	0.00
Kurtosis:	10.969	Cond. No.	7.40

Notes:
[1] Standard Errors are heteroscedasticity robust (HC1)

Simulate placebo policy

def fn_placebo_treatment(row,tstart,treated):
    if (row.t>=tstart) & (row.group in treated):
        return 1
    else:
        return 0

THIS IS THE PART WE DIDN’T COVER IN LECTURE 4

What the code below does is to simulate a placebo policy - where we mistakenly treat some units as being assigned a treatment even though they’re not and we estiamte the treatment effect We expect to reject the null hypothesis that the treatment effect is different from 0 a low % of the time because the treatment effect doesn’t exist so it is zero! Indeed, we reject the null around 1% of the time below. With more replications it should converge to 5%.

R = 100
l = []
n = 1000
G = 10
n_treated = 0
T = 20

beta = 20
delta = 1
treat_start = 10
linear_trend = True
colnames = ['Y','alpha','gamma','I','X']

for r in tqdm(range(R)):
    np.random.seed(r)
    [Y,alpha,gamma,treat,X]= fn_generate_grouped_panel(n,G,T,n_treated,treat_start,beta,delta,linear_trend)
    data = [Y,alpha,gamma,treat,X]
    dfg = fn_create_wide_to_long_df(data,colnames,n,G,T)
    tstart = random.choice(range(dfg.t.min(),dfg.t.max()+1)) # treatment start date
    treated = random.choices(range(dfg.group.min(),dfg.group.max()+1),k = int(dfg.group.max()/2))
    dfg['I'] = dfg.apply(lambda row: fn_placebo_treatment(row,tstart,treated),axis = 1)
    dfw = fn_within_transformation(dfg,colnames,'group')  
    xvars = pd.concat([dfw[['I_w','X_w']],
                  pd.get_dummies(dfw['t'])],axis = 1)
    mod = sm.OLS(dfw['Y_w'],xvars,cluster= dfw['group'])
    res = mod.fit(cov_type='HC1')
    t = res.params.I_w/res.HC1_se.I_w
    if np.abs(t)>1.96:
        l = l+[1]
    else:
        l = l+[0]

  6%|▌         | 6/100 [00:02<00:35,  2.61it/s]<ipython-input-223-02feb1c08ec8>:27: RuntimeWarning: invalid value encountered in double_scalars
  t = res.params.I_w/res.HC1_se.I_w
100%|██████████| 100/100 [00:32<00:00,  3.11it/s]

np.mean(l) # we reject the null of a nonzero treatment effect 1% of the time

0.01

Now let’s add some serial correlation to the DGP

n = 1000
G = 10
n_treated = 5
T = 20

beta = 20
delta = 1
treat_start = 10

linear_trend = True
serial_corr = True
[Y,alpha,gamma,treat,X]= fn_generate_grouped_panel(n,G,T,n_treated,treat_start,beta,delta,linear_trend,serial_corr)
colnames = ['Y','alpha','gamma','I','X']
data = [Y,alpha,gamma,treat,X]
dfg = fn_create_wide_to_long_df(data,colnames,n,G,T)
fn_plot_dd(dfg,treat_start)

png

$y_{it}$

shock at time $t$ equal to $\gamma_t$

Estimate:

$y_{it} = \alpha + \beta*\gamma_t+\varepsilon_{it}$

Obtain residuals $\hat{\varepsilon}_{it}$

def fn_run_mc(beta,R,corr):
    l0 = []
    l1 = []
    for r in tqdm(range(R)):
        np.random.seed(r)
        [Y,alpha,gamma,treat,X]= fn_generate_grouped_panel(n,G,T,n_treated,treat_start,beta,delta,linear_trend,corr)
        data = [Y,alpha,gamma,treat,X]
        dfg = fn_create_wide_to_long_df(data,colnames,n,G,T)
        tstart = random.choice(range(dfg.t.min(),dfg.t.max()+1))
        treated = random.choices(range(dfg.group.min(),dfg.group.max()+1),k = int(dfg.group.max()/2))
    #     dfg['I'] = dfg.apply(lambda row: fn_placebo_treatment(row,tstart,treated),axis = 1)
        dfw = fn_within_transformation(dfg,colnames,'group')  
        xvars = pd.concat([dfw[['I_w','X_w']],
                      pd.get_dummies(dfw['t'])],axis = 1)
        mod = sm.OLS(dfw['Y_w'],xvars,cluster= dfw['group'])
        res = mod.fit(cov_type='HC1')
        t0 = res.params.I_w/res.HC1_se.I_w # test H0: beta=0
        t1 = (res.params.I_w-beta)/res.HC1_se.I_w # test H0: beta = beta_true

        l0 = l0 + [1*(np.abs(t0)>1.96)]
        l1 = l1 + [1*(np.abs(t1)>1.96)]

    return np.mean(l0),np.mean(l1)

t_stat = .2
t_stat>1.96, 1*(t_stat>1.96)

(False, 0)

R = 500
n = 1000
G = 10
n_treated = 5
T = 20
delta = 1
treat_start = 10
linear_trend = True
colnames = ['Y','alpha','gamma','I','X']

Treatment effect = 2 with no serial correlation

beta = 2
corr = False

h1, hb = fn_run_mc(beta,R,corr)
h1,hb

100%|██████████| 500/500 [00:38<00:00, 13.04it/s]

(1.0, 0.066)

Treatment effect = 2 with serial correlation

beta = 2
corr = True

h1, hb = fn_run_mc(beta,R,corr)
h1,hb

100%|██████████| 500/500 [00:25<00:00, 19.81it/s]

(1.0, 0.34)

Treatment effect = 0, no serial correlation

beta = 0
corr = False

h1, hb = fn_run_mc(beta,R,corr)
print('We reject beta=0 {} of the time'.format(h1))

100%|██████████| 500/500 [00:27<00:00, 18.02it/s]

We reject beta=0 0.066% of the time

Treatment effect = 0, serial correlation

beta = 0
corr = True

h1, hb = fn_run_mc(beta,R,corr)
print('We reject beta=0 {} of the time'.format(h1))

100%|██████████| 500/500 [00:37<00:00, 13.47it/s]

We reject beta=0 0.34 of the time

Sentsitivity tests

Redo the analysis on pre-event years - the estiamted treatment effect should be zero! Are treatment and control gropus similar along observable dimensions? Make sure the change is concentrated around the event Make sure tha tother outcome variables that should be unaffected by the event are indeed unaffected

Share on

Twitter Facebook LinkedIn

Youngmin Ju

Causal Inference - Differences-in-Differences

1. Generate data

Let’s focus on the more interesting case where we observe multiple time periods and multiple groups

Estimation

Estimation using a full set of time and group dummies

Estimation using a within group transformation

Multiple groups and time periods

Estimation

Simulate placebo policy

THIS IS THE PART WE DIDN’T COVER IN LECTURE 4

Now let’s add some serial correlation to the DGP

Treatment effect = 2 with no serial correlation

Treatment effect = 2 with serial correlation

Treatment effect = 0, no serial correlation

Treatment effect = 0, serial correlation

Sentsitivity tests

Share on

You may also enjoy

A resource list for causality in statistics, data science

Measuring the Effect of a Training Program on Fulfillment Center Associate Performance

A Comprehensive Causal Inference Framework for Business Questions

Big Data Econometrics