Vous êtes sur la page 1sur 6

clear

capture log close


log using cluster.log, replace
set obs 6
set seed 41615
gen y= rnormal(100,5)
gen c=.
replace c= 1 if [_n] <3
replace c= 2 if [_n] < 5 & c ==.
replace c=3 if [_n] <7 & c ==.
gen d1=0
replace d1=1 if [_n] < 7 & [_n] >4
label var y "fake y"
label var c "cluster"
label var d1 "dummy that is zeros in all clusters except 1 "
/* Note: see help robust-> remarks and examples and regress-> methods and
formulas as well as -help j_robustsingular- */
reg y d1
**************************************
/* gen SE B1 by hand assuming i.i.d. error structure */
******************************************
/* The usual formulas for the variance of B1 are sigma^2/[SSTx(1-R2j)] and
sigma^2(X'X)^1. The SE B1 is sigma/stddev(x). See Wooldridge, Introductory
Econometrics. One estimate of sigma squared is obtained and applied to
all the diagnoal elements of the VCE. Off diagonal elements are zero. */
/* display residual sum of squares */
disp e(rss)
/* display degrees of freedom */
disp e(df_r)
/* display the estimate of sigma squared */
disp e(rss)/e(df_r)
/* Above is the mean square error. Need the square root of this, divided by
SSTx to get the standard error of B1 */
gen double sigma= (e(rss)/e(df_r))^.5
disp sigma
disp e(rmse)
/* Calculate SSTx using three different approaches */
summ d1
gen double mean= r(mean)
gen double diff= d1-mean
gen double diff_sq= diff^2
egen double sum_diff_sq= sum(diff_sq)
disp sum_diff_sq
/* this is SSTx, approach 1*/
/* Calculate SE B1, approach 1 */
disp sigma/(sum_diff_sq^.5)

/* Second approach using the mse1 option to get (X'X)^-1 per Stata documentation
P, _robust, Example 1, pg. 446 Stata 14. */
qui reg y d1, mse1
matrix list e(V)
/* above forces mse to be equal to 1, so the variance-covariance matrix
reports 1/SSTx. 1/(4/3)= 3/4= .75 reported above and reconciled to the 4/3
reported as SSTx, approach 1 above. */
matrix V= e(V)
disp 1/V[1,1]
disp sigma/ ((1/V[1,1])^.5)
disp sigma * ((V[1,1])^.5)
/* Calculate SE B1, approach 2 */
matrix drop V
/* Third approach directly using (X'X)^-1 */
gen cons=1
mkmat cons
mkmat d1
matrix x= d1, cons
matrix xprimexinv= inv(x'*x)
matrix list xprimexinv
/* Calculate SE B1, approach 3 */
disp sigma*(xprimexinv[1,1]^.5)
******************************************************************************
/* Calculating the heteroskedasticity robust estimate of the SE for B1 */
**************************************************************************
/* Next drop the i.d. assumption and allow the variance for each
observation to be conditional upon the values of the regressor for that
observation. Below is manual implementation of Huber-White-sandwich estimator
which is [N/N-K](X'X)^-1(Sum: ui^2 xi'xi)(X'X)^-1
See Cameron and Trivedi, Microeconometrics and Baum, An Introduction to
Modern Econometrics Using Stata. Individual estimates of sigma squared
for each observation are obtained. Off diagonal elements are zero. */
reg y d1
predict double uhat, resid
gen uhatsq= uhat^2
/* Create
matrix a=
matrix b=
matrix c=
matrix d=
matrix e=
matrix f=

a row of regressors, including the constant, for each observation */


(0,1)
(0,1)
(0,1)
(0,1)
(1,1)
(1,1)

/* Create the squared residual for each observation */


scalar a1= uhatsq in 1
scalar b1= uhatsq in 2
scalar c1= uhatsq in 3
scalar d1= uhatsq in 4

scalar e1= uhatsq in 5


scalar f1= uhatsq in 6
/* Calculate each individual component of the "sandwich" */
matrix one= a1*a'*a
matrix two= b1*b'*b
matrix three= c1*c'*c
matrix four= d1*d'*d
matrix five= e1*e'*e
matrix six= f1*f'*f
/* Create the total sandwich from each of the component matrices */
matrix sand= one + two + three + four +five + six
matrix list sand
scalar y= 6/4
matrix hand= y*xprimexinv*sand*xprimexinv
/* Display the manual calculation of the robust SE's */
matrix list hand
reg y d1, robust
/* Display the automatically generated robust VCE */
matrix list e(V)
matrix V= e(V)
disp (V[1,1])^.5
disp hand[1,1]^.5
/* All of the above agree */
***************************************************************
/* Calculate one-way clustered SE's */
***********************************************************
/* Next drop the i.i.d. assumption and allow for residuals to be both
heteroskedastic and correlated across groups.
See Stata's documentation P, _robust, Example 2. Cameron and
Trivedi, Microeconometrics; Baum, An Introduction to Modern Econometrics
Using Stata. By-hand formulas below use Cameron and Trivedi pg. 834.
Individual estimates of sigma squared for each observation
are obtained and correlation within clusters is allowed. */
/* Manually create matrices for each cluster, with the constant
term in the matrix */
matrix c1= (0,1\0,1)
matrix c2= (0,1\0,1)
matrix c3= (1,1\1,1)
matrix c1primec1= c1' * c1
matrix c2primec2= c2' * c2
matrix c3primec3= c3' * c3
matrix sum= c1primec1 + c2primec2 + c3primec3
matrix list sum
matrix invsum= inv(sum)
matrix list invsum

/* Above is the denominator for the sandwich */


/* Generate the numerator for the sandwich */
reg y d1
predict double resid, resid
/* Create matrices of the residuals within each cluster */
matrix error_1= (resid[1]\ resid[2])
matrix list error_1
/* Look at only product of u1*u1' */
matrix u1= error_1 * error_1'
matrix list u1
matrix num_1= c1'*error_1*error_1'*c1
matrix list num_1
matrix error_2= (resid[3]\resid[4])
matrix list error_2
/* Look at only product of u2*u2' */
matrix u2= error_2 * error_2'
matrix list u2
matrix num_2= c2'*error_2*error_2'*c2
matrix list num_2
matrix error_3= (resid[5]\resid[6])
/* Note that within this cluster the residuals are equal to 3.189 and
-3.189, so the sum of the residuals in the cluster is 0. */
/* Look at only product of u3*u3' */
matrix u3= error_3 * error_3'
matrix list u3
matrix num_3= c3'*error_3*error_3'*c3
matrix list num_3
/* The num_3 matrix is obviously singular because it's all zeros */
matrix num_tot= num_1 + num_2 + num_3
matrix list num_tot
/* test for invertibility */
capture noisily matrix invnum_tot= inv(num_tot)
/* the num_tot matrix is singular which results in a missing F statistic
for the model when clustering is done. */
/* correct for degrees of freedom */
gen dfc_term_1= (6-1)/(6-2)
gen dfc_term_2= 3/(3-1)
scalar dfc= dfc_term_1 * dfc_term_2
disp dfc
matrix cluster_hand= dfc*invsum*num_tot*invsum
matrix list cluster_hand
capture noisily matrix invcluster_hand=inv(cluster_hand)

/* the cluster_hand matrix is also singular. Next, display the SE of B0


and B1 which will be the same */
disp cluster_hand[1,1]^.5
disp cluster_hand[2,2]^.5
reg y d1, cluster(c)
matrix list e(V)
matrix V=e(V)
capture noisily matrix invV= inv(V)
/* The e(V) matrix agrees to the by-hand calculations and is singular */
**********************************************************
/* Calculate Outer Product of Gradients (OPG) Variance Estimate by Hand.*/
*****************************************************************
/* This by hand calculation agrees to examples given in
Maximum Likelihood Estimation with Stata, 4th. ed, Gould, Pitblado, Poi.
See pages 54, 99, 101, 260. Output below agrees to the program contained on
page 260 */
clear
sysuse auto
reg mpg weight displacement
predict double uhat, resid
/* calc sigma by hand */
summ uhat
gen umean= r(mean)
gen sst= (uhat-umean)^2
egen sst_tot= total(sst)
disp sst_tot
gen double sigma= (sst_tot/[_N])^.5
summ sigma
gen double sigmasq= sigma^2
summ sigmasq
/* Generate derivatives for the gradient */
gen double cons= uhat/sigmasq
gen double weig= (weight*uhat)/sigmasq
gen double disp= (displacement*uhat)/sigmasq
gen double sigm= (uhat/sigma) * (uhat/sigma) -1
mkmat
mkmat
mkmat
mkmat

cons
weig
disp
sigm

matrix full= cons, weig, disp, sigm


/* obtain variance estimates */
matrix test= inv(full'*full)
matrix list test
/* calc standard errors of the constant, each regressor, and sigma. */
disp test[1,1]^.5
disp test[2,2]^.5
disp test[3,3]^.5
disp test[4,4]^.5

Vous aimerez peut-être aussi