A Data Scientist's blog: Structural Equation Modeling in R for the personality project

Structural Equation Modeling in R

Structural equation models combine measurement models (e.g., reliability) with structural models (e.g., regression). The sem package, developed by John Fox, allows for some basic structural equation models. To use it, add the sem package by using the package manager.
Structural Equation Modeling may be thought of as regression corrected for attentuation. The sem package developed by John Fox uses the RAM path notation of Jack McCardle and is fairly straightforward. The examples in the package are quite straightforward. A text book, such as John Loehlin's Latent Variable Models (4th Edition) is helpful in understanding the algorithm.
For much more detail on using R to do structural equation modeling, see the course notes for sem (primarily using R) available at the syllabus for my sem course. Also see John Fox's notes that he has prepared as a brief description of SEM techniques as an appendix to his statistics text.

#Loehlin problem 2.5 
 obs.var2.5 = c('Ach1',  'Ach2',  'Amb1',  'Amb2',  'Amb3')
 R.prob2.5 = matrix(c(
  1.00 ,  .60  , .30,  .20,   .20,                                               
   .60,  1.00,   .20,   .30,   .10,                                                
   .30,   .20,  1.00,   .70,   .60 ,                                               
   .20,   .30,   .70,  1.00,   .50,                                                
   .20,   .10,   .60,  .50,  1.00), ncol=5,byrow=TRUE)    
  
  
  
  #correlated factors structure (ambition <-> Achievement) 
  model2.5=matrix(c(
        'Ambit ->  Amb1',      'a', NA,
        'Ambit -> Amb2' ,      'b', NA,
        'Ambit -> Amb3' ,      'c', NA,
        'Achieve -> Ach1',     'd', NA,
        'Achieve -> Ach2',     'e', NA,
        'Ambit <-> Achieve',   'f', NA,
        'Amb1 <-> Amb1' ,      'u', NA,
        'Amb2 <-> Amb2' ,      'v', NA,
        'Amb3 <-> Amb3' ,      'w', NA,
        'Ach1 <-> Ach1' ,      'x', NA,
        'Ach2 <-> Ach2' ,      'y', NA,
        'Achieve <-> Achieve',  NA, 1,
        'Ambit <-> Ambit',      NA, 1),
        ncol=3, byrow=TRUE)
 sem2.5= sem(model2.5,R.prob2.5,60, obs.var2.5)
  summary(sem2.5,digits=3)

 Model Chisquare =  9.74   Df =  4 Pr(>Chisq) = 0.0450
 Goodness-of-fit index =  0.964
 Adjusted goodness-of-fit index =  0.865
 RMSEA index =  0.120   90 % CI: (0.0164, 0.219)
 BIC =  -15.1 

 Normalized Residuals
     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
-5.77e-01 -3.78e-02 -2.04e-06  4.85e-03  3.87e-05  1.13e+00 

 Parameter Estimates
  Estimate Std Error z value Pr(>|z|)                   
a    0.920    0.0924   9.966 0.00e+00    Amb1 <--- 0.0955="" 0.0965="" 0.1138="" 0.1509="" 0.1762="" 0.356="" 0.652="" 0.683="" 0.761="" 0.879="" 1.45e-11="" 1.55e-15="" 1.76e-03="" 3.127="" 4.525="" 4.986="" 6.03e-06="" 6.16e-07="" 6.753="" 7.974="" ach1="" ach2="" achieve="" amb2="" amb3="" ambit="" b="" c="" d="" e="" f=""> Ambit
u    0.153    0.0982   1.557 1.20e-01     Amb1 <--> Amb1
v    0.420    0.0898   4.679 2.88e-06     Amb2 <--> Amb2
w    0.575    0.0949   6.061 1.35e-09     Amb3 <--> Amb3
x    0.228    0.2791   0.816 4.15e-01     Ach1 <--> Ach1
y    0.534    0.1837   2.905 3.67e-03     Ach2 <--> Ach2

 Iterations =  26 
>  #causal structure with errors in achievement
>   #ambition -> achievement
>   
>  model2.51=matrix(c(
+       'Ambit ->  Amb1',      'a', NA,
+       'Ambit -> Amb2' ,      'b', NA,
+       'Ambit -> Amb3' ,      'c', NA,
+       'Achieve -> Ach1',     'd', NA,
+       'Achieve -> Ach2',     'e', NA,
+       'Ambit -> Achieve',   'f', NA,
+       'Amb1 <-> Amb1' ,      'u', NA,
+       'Amb2 <-> Amb2' ,      'v', NA,
+       'Amb3 <-> Amb3' ,      'w', NA,
+       'Ach1 <-> Ach1' ,      'x', NA,
+       'Ach2 <-> Ach2' ,      'y', NA,
+       'Achieve <-> Achieve',  NA, 1,
+       'Ambit <-> Ambit',      NA, 1),
+       ncol=3, byrow=TRUE)




>    
>  sem2.51= sem(model2.51,R.prob2.5,100, obs.var2.5)
>  summary(sem2.51,digits=3)

 Model Chisquare =  9.74   Df =  4 Pr(>Chisq) = 0.0450
 Goodness-of-fit index =  0.964
 Adjusted goodness-of-fit index =  0.865
 RMSEA index =  0.120   90 % CI: (0.0164, 0.219)
 BIC =  -15.1 

 Normalized Residuals
     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
-5.77e-01 -3.78e-02 -6.33e-06  4.85e-03  3.06e-05  1.13e+00 

 Parameter Estimates
  Estimate Std Error z value Pr(>|z|)                   
a    0.920    0.0924   9.966 0.00e+00    Amb1 <--- 0.0955="" 0.0965="" 0.0982="" 0.1343="" 0.1394="" 0.153="" 0.1781="" 0.381="" 0.638="" 0.652="" 0.761="" 0.821="" 1.20e-01="" 1.45e-11="" 1.557="" 1.55e-15="" 2.01e-06="" 2.731="" 3.98e-06="" 4.613="" 4.752="" 6.32e-03="" 6.753="" 7.974="" ach1="" ach2="" achieve="" amb1="" amb2="" amb3="" ambit="" b="" c="" d="" e="" f="" u=""> Amb1
v    0.420    0.0898   4.679 2.88e-06     Amb2 <--> Amb2
w    0.575    0.0949   6.061 1.35e-09     Amb3 <--> Amb3
x    0.228    0.2791   0.816 4.15e-01     Ach1 <--> Ach1
y    0.534    0.1837   2.906 3.67e-03     Ach2 <--> Ach2

 Iterations =  27 
> #causal structure with errors in achievement
>   #ambition -> achievement
>   
>  model2.52=matrix(c(
+       'Ambit ->  Amb1',      'a', NA,
+       'Ambit -> Amb2' ,      'b', NA,
+       'Ambit -> Amb3' ,      'c', NA,
+       'Achieve -> Ach1',     'd', NA,
+       'Achieve -> Ach2',     'e', NA,
+       'Ambit -> Achieve',   'f', NA,
+       'Amb1 <-> Amb1' ,      'u', NA,
+       'Amb2 <-> Amb2' ,      'v', NA,
+       'Amb3 <-> Amb3' ,      'w', NA,
+       'Ach1 <-> Ach1' ,      'x', NA,
+       'Ach2 <-> Ach2' ,      'y', NA,
+       'Achieve <-> Achieve', 'z', NA,
+       'Ambit <-> Ambit',      NA, 1),
+       ncol=3, byrow=TRUE)
>    
>  sem2.52= sem(model2.52,R.prob2.5,100, obs.var2.5)
Warning message: 
Negative parameter variances.
Model is probably underidentified.
 in: sem.default(ram = ram, S = S, N = N, param.names = pars, var.names = vars,  
>  summary(sem2.52,digits=3)

 Model Chisquare =  9.74   Df =  3 Pr(>Chisq) = 0.0209
 Goodness-of-fit index =  0.964
 Adjusted goodness-of-fit index =  0.82
 RMSEA index =  0.151   90 % CI: (0.0517, 0.261)
 BIC =  -8.9 

 Normalized Residuals
     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
-5.77e-01 -3.78e-02 -1.62e-06  4.85e-03  3.43e-05  1.13e+00 

 Parameter Estimates
  Estimate Std Error z value Pr(>|z|)                     
a    0.920    0.0924   9.966 0.00e+00      Amb1 <--- 0.0955="" 0.0965="" 0.0982="" 0.153="" 0.346="" 0.652="" 0.703="" 0.761="" 0.905="" 1.20e-01="" 1.45e-11="" 1.557="" 1.55e-15="" 6.753="" 7.974="" ach1="" ach2="" achieve="" amb1="" amb2="" amb3="" ambit="" b="" c="" d="" e="" f="" nan="" u=""> Amb1
v    0.420    0.0898   4.679 2.88e-06       Amb2 <--> Amb2
w    0.575    0.0949   6.061 1.35e-09       Amb3 <--> Amb3
x    0.228    0.2791   0.816 4.15e-01       Ach1 <--> Ach1
y    0.534    0.1837   2.905 3.67e-03       Ach2 <--> Ach2
z    0.824       NaN     NaN      NaN Achieve <--> Achieve

 Iterations =  27 

 Aliased parameters: d e f z 
Warning message: 
NaNs produced in: sqrt(diag(object$cov)) 
>   #causal structure with errors in achievement
>   #ambition -> achievement
>   #fix achievement to Loehlin answer
>   
>  model2.53=matrix(c(
+       'Ambit ->  Amb1',      'a', NA,
+       'Ambit -> Amb2' ,      'b', NA,
+       'Ambit -> Amb3' ,      'c', NA,
+       'Achieve -> Ach1',     'd', NA,
+       'Achieve -> Ach2',     'e', NA,
+       'Ambit -> Achieve',   'f', NA,
+       'Amb1 <-> Amb1' ,      'u', NA,
+       'Amb2 <-> Amb2' ,      'v', NA,
+       'Amb3 <-> Amb3' ,      'w', NA,
+       'Ach1 <-> Ach1' ,      'x', NA,
+       'Ach2 <-> Ach2' ,      'y', NA,
+       'Achieve <-> Achieve', NA, .873,
+       'Ambit <-> Ambit',      NA, 1),
+       ncol=3, byrow=TRUE)
>    
>  sem2.53= sem(model2.53,R.prob2.5,100, obs.var2.5)
>  summary(sem2.53,digits=3)\
Error: syntax error
> summary(sem2.53,digits=3)

 Model Chisquare =  9.74   Df =  4 Pr(>Chisq) = 0.0450
 Goodness-of-fit index =  0.964
 Adjusted goodness-of-fit index =  0.865
 RMSEA index =  0.120   90 % CI: (0.0164, 0.219)
 BIC =  -15.1 

 Normalized Residuals
     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
-5.77e-01 -3.78e-02 -1.14e-06  4.85e-03  3.19e-05  1.13e+00 

 Parameter Estimates
  Estimate Std Error z value Pr(>|z|)                   
a    0.920    0.0924   9.966 0.00e+00    Amb1 <--- 0.0955="" 0.0965="" 0.0982="" 0.1303="" 0.1437="" 0.153="" 0.1906="" 0.356="" 0.652="" 0.683="" 0.761="" 0.879="" 1.20e-01="" 1.45e-11="" 1.557="" 1.55e-15="" 2.01e-06="" 2.731="" 3.98e-06="" 4.612="" 4.752="" 6.32e-03="" 6.753="" 7.974="" ach1="" ach2="" achieve="" amb1="" amb2="" amb3="" ambit="" b="" c="" d="" e="" f="" u=""> Amb1
v    0.420    0.0898   4.679 2.88e-06     Amb2 <--> Amb2
w    0.575    0.0949   6.061 1.35e-09     Amb3 <--> Amb3
x    0.228    0.2791   0.816 4.15e-01     Ach1 <--> Ach1
y    0.534    0.1837   2.906 3.67e-03     Ach2 <--> Ach2

 Iterations =  27 
> 


ÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄ

 #Loehlin problem from table 2-12
 #Note that version a is a classic example of congeneric measurement.
 #Alternatively, this could be thought of as underidentified higher order model

 obs.var2.12 = c('a',  'b',  'c',  'd')
  R.prob2.12 = matrix(c(
  1.00 ,  .30,    .20,   .10,                                               
   .30,  1.00,   .20,   .20,                                               
   .20,   .20,  1.00,   .30,                                            
   .10,   .20,   .30,  1.00), 
   ncol=4,byrow=TRUE)

  model2.12a=matrix(c(
        'g ->  a',      'a1', NA,
        'g -> b' ,      'b1', NA,
        'g -> c' ,      'c1', NA,
        'g -> d',     'd1', NA,
        'a <-> a',      'e1', NA,
        'b <-> b',      'e2', NA,
        'c <-> c',      'e3', NA,
        'd <-> d',      'e4', NA,
        'g <-> g',       NA, 1),
        ncol=3, byrow=TRUE)
  sem2.12a= sem(model2.12a,R.prob2.12,120, obs.var2.12)
 summary(sem2.12a,digits=3)

#a 1 degree of freedom model
model2.12b=matrix(c(
        'g ->  a',      'a1', NA,
        'g -> b' ,      'b1', NA,
        'f -> c' ,      'c1', NA,
        'f -> d',     'd1', NA,
        'a <-> a',      'e1', NA,
        'b <-> b',      'e2', NA,
        'c <-> c',      'e3', NA,
        'd <-> d',      'e4', NA,
        'g <-> g',       NA, 1,
        'f <-> f',       NA, 1,
        'g <-> f',        'fg', NA),
        ncol=3, byrow=TRUE)
  sem2.12b= sem(model2.12b,R.prob2.12,120, obs.var2.12)
 summary(sem2.12b,digits=3)
 
#the following higher level model has 0 degrees of freedom
 model2.12c=matrix(c(
        'g ->  a',      'a1', NA,
        'g -> b' ,      'b1', NA,
        'f -> c' ,      'c1', NA,
        'f -> d',       'd1', NA,
        'a <-> a',      'e1', NA,
        'b <-> b',      'e2', NA,
        'c <-> c',      'e3', NA,
        'd <-> d',      'e4', NA,
        'g <-> g',       NA, 1,
        'f <-> f',       NA, 1,
        'h -> g',        'hg', NA,
        'h -> f',        NA,1,
        'h <-> h',        NA,1),
        ncol=3, byrow=TRUE)
  sem2.12c= sem(model2.12c,R.prob2.12,120, obs.var2.12)
 summary(sem2.12c,digits=3)
 
 
 
 ÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄ
 
 #Loehlin problem 2.9
 
  obs.var2.09 = c('w',  'x',  'y',  'z')
  R.prob2.09 = matrix(c(
  1.00 ,  .40,    .50,   .30,                                               
   .40,  1.00,   .55,   .35,                                               
   .50,   .55,  1.00,   .40,                                            
   .30,   .35,   .40,  1.00), 
   ncol=4,byrow=TRUE)

  model2.09=matrix(c(
        'g ->  w',      'a1', NA,
        'g -> x' ,      'b1', NA,
        'g -> y' ,      'c1', NA,
        'g -> z',     'd1', NA,
        'w <-> w',      'e1', NA,
        'x <-> x',      'e2', NA,
        'y <-> y',      'e3', NA,
        'z <-> z',      'e4', NA,
        'g <-> g',       NA, 1),
        ncol=3, byrow=TRUE)
  sem2.09= sem(model2.09,R.prob2.09,500, obs.var2.09)
 summary(sem2.09,digits=3)
 
 
  obs.var2.09b = c('w',  'x',  'y',  'z')
  
  R.prob2.09b = matrix(c(
  1.00 ,  .40,    .50,   .30,                                               
   .40,  1.00,   .55,   .35,                                               
   .50,   .55,  1.00,   .40,                                            
   .30,   .35,   .40,  1.00), 
   ncol=4,byrow=TRUE)

  model2.09b=matrix(c(
        'g ->  w',      NA,1,
        'g -> x' ,      'b1', NA,
        'g -> y' ,      'c1', NA,
        'g -> z',     'd1', NA,
        'w <-> w',      'e1', NA,
        'x <-> x',      'e2', NA,
        'y <-> y',      'e3', NA,
        'z <-> z',      'e4', NA,
        'g <-> g',       'e',NA),
        ncol=3, byrow=TRUE)
        
  sem2.09b= sem(model2.09b,R.prob2.09b,500, obs.var2.09b)
 summary(sem2.09b,digits=3)

part of a short guide to R
Version of February 15, 2007
William Revelle
Department of Psychology
Northwestern University

Useful R links

Readings and software:
Comprehensive R Archive Network (CRAN)
An introduction to R
R Studio

Structural Equation modelling:
sem
lavaan
psych for sem
EFA and factor extension (fa)

Multilevel modeling:
Multilevel
Linear and Non Linear Mixed Effects nlme
statsBy

Item Response Models:
Latent Trait Model (ltm)
mirt
mokken
irt by factor analysis (irt.fa)

A Data Scientist's blog

Search This Blog

Tuesday, November 12, 2013

Structural Equation Modeling in R for the personality project

Structural Equation Modeling in R

Useful R links

No comments:

Post a Comment