Get (not so easily) introduced to R

Dec 13, 2014

I dislike R, unlike some other softwares I use, such as SPSS and Stata. It's extremely error prone. But it's free, and can do almost everything (e.g., a few things Stata cannot do and a lot of things SPSS/AMOS cannot do). As always, I will update whenever I learn something new.

Before going any further

I recommend to download : https://osf.io/hiuzk/. That's the subset of the General Social Survey data I will be using for my analyses below. So, if you want to follow, you should get it. And do not forget to run the entire syntax displayed in Create and modify variables.

Always remember this

R is case sensitive and does not handle missing data. The files must be loaded. You must use \ instead of \ or it will fail. The function read.csv() below, as you guessed, allows you to read .csv, but you must then save it into an object, whatever you call it. Let's say "entiredata". By importing a .csv file, you don’t need to use data.frame() because R automatically store it as a data frame. If the file is too large, keep only the variables you need by using select() and then write.table() to save in a new file.

setwd("c:\\Users\\MengHuTheGodOfDestruction\\location of your .csv file")
entiredata<-read.csv("GSSsubset.csv")
#d<- select(entiredata, race, educ, wordsum)
#write.table(d, file = "GSSsubset2.csv", sep = ",", col.names = T)

A lot of specific functions require specific packages. We install them and load them with library(). The command require() does the same thing. You only need to install it once, but you must load it each time you open RGui (the R console).

install.packages('psych')
library(psych)

Useful functions (to get your feet wet)

Sys.setenv(LANG = "en") # make R environment in english
help(table) # request the webpage help for table() function
View(d) # open the data window of the object "d"
head(d, n=10) # display ten first rows in all vars
tail(d, n=10) # display ten last rows in all vars
require(dplyr) # get select() function to keep the variables you wish
dk<- select(d, worda, wordb, wordc, wordd, worde, wordf, wordg, wordh, wordi, wordj, wordsum, bw1) # keep the variables you want (useful when you have too many variables)
dk = dk[complete.cases(dk),] # keep only cases with no missing data on all variables in dk
d$bw1<-NULL # drop variables individually
d<- subset(entiredata, age<70 & age>=18) # retain only cases having age<70 and age>=18
xtabs(~degree+sex+race, data=d, subset=wordsum<11) # "do if" not missing data on wordsum because all values in wordsum are below 11
(56 + 31 + 56 + 8 + 32) / 5 # You get 36.6, but I usually do this kind of things in EXCEL or Kingsoft Spreadsheets

Create and modify variables

Please, run the entire syntax in your R console. Otherwise, you will never be able to work with the regression, structural equation models, factor analysis, and item response, that I have prepared. Do not forget to check your directory path and make sure your file is converted into csv.

setwd("c:\\Users\\MengHuTheGodOfDestruction\\location of your .csv file")
entiredata<-read.csv("GSSsubset.csv")
d<- subset(entiredata, age<70 & age>=18) # retain only cases having age<70 and age>=18
require(car) # get recode() function
d$bw<- rep(NA) # generate variable with only missing values
d$bw[d$race==1] <- 1 # assign a value of "1" under the condition specified in bracket
d$bw[d$race==2] <- 0 # assign a value of "0" under the condition specified in bracket
d$bw0<- rep(NA)
d$bw0[d$year>=2000 & d$race==1 & d$hispanic==1] <- 1
d$bw0[d$year>=2000 & d$race==2 & d$hispanic==1] <- 0
d$bw00<- rep(NA)
d$bw00[d$year<2000 & d$race==1] <- 1
d$bw00[d$year<2000 & d$race==2] <- 0
d$bw1<-d$bw0 # combine the two vars, by incorporating the first var
d$bw1[is.na(d$bw0)]<-d$bw00[is.na(d$bw0)] # then the second, without Nas
d$agec<- d$age-40.62 # mean-centering of age
d$agec2<- d$agec^2 # squared term of age
d$agec3<- d$agec^3 # cubic term of age
d$bw1agec = d$bw1*d$agec
d$bw1agec2 = d$bw1*d$agec2
d$bw1agec3 = d$bw1*d$agec3
d$weight<- d$wtssall*d$oversamp # interaction variable
d$gender<- d$sex-1 # assuming sex has M=1 and F=2, gender will have M=0 and F=1
d$realinc[d$realinc==0] <- NA
d$logincome<- log(d$realinc) # log of income
d$sqrtincome<- sqrt(d$realinc) # sqrt of income
d$educ[d$educ>20] <- NA # remove missing values before transformation
d$degree[d$degree>4] <- NA # remove missing values before transformation
d$educc<- d$educ-13.09 # remove NAs before data transformation 
d$degreec<- d$degree-1.39 # remove NAs before data transformation
d$sibs[d$sibs==-1] <- NA
d$sibs[d$sibs>37] <- NA
d$cohort[d$cohort==9999] <- NA
d$wordsum[d$wordsum==-1] <- NA
d$wordsum[d$wordsum==99] <- NA
d$worda[d$worda<0] <- NA
d$wordb[d$wordb<0] <- NA
d$wordc[d$wordc<0] <- NA
d$wordd[d$wordd<0] <- NA
d$worde[d$worde<0] <- NA
d$wordf[d$wordf<0] <- NA
d$wordg[d$wordg<0] <- NA
d$wordh[d$wordh<0] <- NA
d$wordi[d$wordi<0] <- NA
d$wordj[d$wordj<0] <- NA
d$worda[d$worda==9] <- 0
d$wordb[d$wordb==9] <- 0
d$wordc[d$wordc==9] <- 0
d$wordd[d$wordd==9] <- 0
d$worde[d$worde==9] <- 0
d$wordf[d$wordf==9] <- 0
d$wordg[d$wordg==9] <- 0
d$wordh[d$wordh==9] <- 0
d$wordi[d$wordi==9] <- 0
d$wordj[d$wordj==9] <- 0
d$word = d$worda + d$wordb + d$wordc + d$wordd + d$worde + d$wordf + d$wordg + d$wordh + d$wordi + d$wordj

d<- d %>% mutate(d, cohort6 = case_when(
cohort>=1905 & cohort<=1928~0, 
cohort>=1929 & cohort<=1943~1, 
cohort>=1944 & cohort<=1953~2, 
cohort>=1954 & cohort<=1962~3, 
cohort>=1963 & cohort<=1973~4, 
cohort>=1974 & cohort<=1994~5, 
cohort>1994~NA_real_))  

d$cohortdummy1<- as.numeric(d$cohort>=1905 & d$cohort<=1928)
d$cohortdummy2<- as.numeric(d$cohort>=1929 & d$cohort<=1943)
d$cohortdummy3<- as.numeric(d$cohort>=1944 & d$cohort<=1953)
d$cohortdummy4<- as.numeric(d$cohort>=1954 & d$cohort<=1962)
d$cohortdummy5<- as.numeric(d$cohort>=1963 & d$cohort<=1973)
d$cohortdummy6<- as.numeric(d$cohort>=1974 & d$cohort<=1994)
d$bw1C1<- d$bw1*d$cohortdummy1
d$bw1C2<- d$bw1*d$cohortdummy2
d$bw1C3<- d$bw1*d$cohortdummy3
d$bw1C4<- d$bw1*d$cohortdummy4
d$bw1C5<- d$bw1*d$cohortdummy5
d$bw1C6<- d$bw1*d$cohortdummy6

d<- d %>% mutate(d, cohort21 = case_when(
cohort>=1905 & cohort<=1915~0, 
cohort>=1916 & cohort<=1920~1, 
cohort>=1921 & cohort<=1925~2, 
cohort>=1926 & cohort<=1929~3, 
cohort>=1930 & cohort<=1933~4, 
cohort>=1934 & cohort<=1937~5, 
cohort>=1938 & cohort<=1941~6, 
cohort>=1942 & cohort<=1944~7, 
cohort>=1945 & cohort<=1947~8, 
cohort>=1948 & cohort<=1950~9, 
cohort>=1951 & cohort<=1953~10, 
cohort>=1954 & cohort<=1956~11, 
cohort>=1957 & cohort<=1959~12, 
cohort>=1960 & cohort<=1962~13, 
cohort>=1963 & cohort<=1965~14, 
cohort>=1966 & cohort<=1968~15, 
cohort>=1969 & cohort<=1972~16, 
cohort>=1973 & cohort<=1976~17, 
cohort>=1977 & cohort<=1980~18, 
cohort>=1981 & cohort<=1985~19, 
cohort>=1986 & cohort<=1994~20, 
cohort>1994~NA_real_)) 

d$age9<- NA
d<- d %>% mutate(d, age9 = case_when(
age>=18 & age<=23~1, 
age>=24 & age<=28~2, 
age>=29 & age<=33~3, 
age>=34 & age<=38~4, 
age>=39 & age<=44~5, 
age>=45 & age<=50~6, 
age>=51 & age<=56~7, 
age>=57 & age<=62~8, 
age>=63 & age<=69~9, 
TRUE~ as.numeric(age9))) 

d$aged1<- as.numeric(d$age>=18 & d$age<=23)
d$aged2<- as.numeric(d$age>=24 & d$age<=28)
d$aged3<- as.numeric(d$age>=29 & d$age<=33)
d$aged4<- as.numeric(d$age>=34 & d$age<=38)
d$aged5<- as.numeric(d$age>=39 & d$age<=44)
d$aged6<- as.numeric(d$age>=45 & d$age<=50)
d$aged7<- as.numeric(d$age>=51 & d$age<=56)
d$aged8<- as.numeric(d$age>=57 & d$age<=62)
d$aged9<- as.numeric(d$age>=63 & d$age<=69)
d$bw1aged1<- d$bw1*d$aged1
d$bw1aged2<- d$bw1*d$aged2
d$bw1aged3<- d$bw1*d$aged3
d$bw1aged4<- d$bw1*d$aged4
d$bw1aged5<- d$bw1*d$aged5
d$bw1aged6<- d$bw1*d$aged6
d$bw1aged7<- d$bw1*d$aged7
d$bw1aged8<- d$bw1*d$aged8
d$bw1aged9<- d$bw1*d$aged9

The following code will attach some value labels to your categorical variables. Remember that if you do this, you cannot use it anymore for the creation of another variable; e.g., you may need bw1/race to compute Y-hat after regression.

d$bw1 <- factor(d$bw1,
levels = c(0,1),
labels = c("black", "white"))

d$race <- factor(d$race,
levels = c(1,2,3),
labels = c("white", "black", "other"))

Imputation

Let's say you want to impute cognitive data but allow for the possibility that the test correlations could differ across race groups. First, remove cases with too few subtest data, then split the data into male whites and male blacks. You do not want background information such as ID, age, race to enter the imputation method but need them back after imputation is done. Finally, combine the separate datasets into one to restore the original datafile but with missing values filled with imputed values.

library(mice)

mwhite<- mwhite[rowSums(is.na(mwhite)) <10,] # removes cases who took less than 10 tests
impute<- data.frame(mwhite[,1:37]) #  restrict the columns 1-37 containing cognitive variables so as to avoid background variables to enter imputation method
impute<- impute %>% mutate_all(as.numeric)
impute1<- mice(impute, method='pmm', m=5, maxit=50, seed=500)
impute1<- complete(impute1, 2) # save complete data and as two decimals
mblack<- mblack[rowSums(is.na(mblack)) <10,]
impute<- data.frame(mblack[,1:37])
impute<- impute %>% mutate_all(as.numeric)
impute2<- mice(impute, method='pmm', m=5, maxit=50, seed=500)
impute2<- complete(impute2, 2)
mwhite1<- cbind(impute1, wb=mwhite$wb, sex=mwhite$sex, age=mwhite$age, BY_ID_RE=mwhite$BY_ID_RE) # select background variables and put them back
mblack1<- cbind(impute2, wb=mblack$wb, sex=mblack$sex, age=mblack$age, BY_ID_RE=mblack$BY_ID_RE) # same for blacks
dm<- rbind(mwhite1, mblack1) # combine black and white data
write.table(dm, file = "TALENTimputeM.csv", sep=",", row.names = F, col.names = T)

Summary statistics

Cross-tabulation can be used with more than 2 variables.

describeBy() is particularly useful. You get N, mean, 10% trimmed mean, median, standard deviation, standard error of the mean, skewness, kurtosis, min, max, and finally "mad" which is a robust estimator of the standard deviation that is computed by a transformation of the median absolute deviation (mad) from the sample median. describe() does the same thing but without groupings.

require(dplyr)
d %>% group_by(race, sex) %>% summarise(mean=mean(age), sd=sd(age))

require(psych) # for describe() and describeBy()

describeBy(d$degree, d$race) # summary stats of degree by race
describeBy(d$degree) # it also works with a single variable

n = table(d$wordsum, d$degree, d$bw1) # select variables' N to be cross tabulated
addmargins(n) # N by cells and N_sums by rows and columns
addmargins(prop.table(n, margin=2)) # expressed in proportion, i.e., percentage, and with N_sums by rows and columns

with(d, table(degree, race, sex)) # another way of doing cross-tabulations (N per cells)
xtabs(~degree+bw1+gender, data=d, na.action=na.omit) # remove NA
xtabs(~degree+bw1+gender, data=d, subset=wordsum<11) # subset

by(d$wordsum, INDICES=d$educ, FUN=mean, na.rm=TRUE) # mean of wordsum by education level

mean(d$wordsum, trim=0.10, na.rm=TRUE) # mean trimmed at 10% for checking outliers, also removes these stupid NAs
mean(d$wordsum, trim=0.05, na.rm=TRUE) # mean trimmed at 5% for checking outliers, also removes these stupid NAs

shapiro.test(d$logincome) # it won't work because N is higher than 5000

Weighted means/SDs can be obtained through a survey design for either summary statistics or regression analysis. For summary stats, there are two ways:

library(survey) # get means and SEs
svy<- svydesign(data=d, id=~BY_ID_RE, strata=NULL, weights=d$BY_WTA)
svyby(~age, by = ~sex + wb, design=svy, FUN=svymean)
var<- svyby(~age, by = ~sex + wb, design=svy, FUN=svyvar) # get variance
sqrt(var) # get the "grouped" SD

library(srvyr) # get means, SEs and SDs
d %>% as_survey_design(ids = BY_ID_RE, weights = BY_WTA) %>% group_by(wb, sex) %>% summarise(MEAN = survey_mean(age), SD = survey_sd(age))

The histogram gives the distribution of wordsum by race. The boxplot displays wordsum mean score by race and by gender.

histogram(~ d$wordsum | d$race, n.label=TRUE) 
boxplot(wordsum~gender*race, data=d, notch=TRUE, col=(c("darkolivegreen1","violet")), main="Meng Hu hates everything and loves nothing", xlab="Race", ylab="Wordsum scores") # x axis shows the values of the categorical variables gender and race

Scatterplots

library(car) # scatterplot function
library(gplots) # plotmeans() function
library(lattice) # xyplot() bwplot() densityplot() functions
library(ggplot2)
library(ggpubr)

ggplot( d, aes( x=d$European_ancestry, y=d$income))+
geom_point()+
stat_cor(method = "pearson") # displays cor in the plot

plot(d$wordsum, d$educ, xlab="wordsum vocabulary score", ylab="years of school", pch=20, col="gray50", frame.plot=TRUE) # you can configure the scale of x and y by using, xlim=c(0,10), ylim=c(0,20)

xyplot(d$wordsum~d$degree|d$bw1, main="title", ylab="wordsum", xlab="degree")
bwplot(d$wordsum~d$degree|d$bw1, main="title", ylab="wordsum", xlab="degree")

scatterplot(wordsum~cohort|bw1, boxplots=FALSE, smooth=TRUE, reg.line=FALSE, data=d) # reg.line=TRUE requests a regression line

densityplot(~d$logincome|d$bw1, main="title", xlab="log of real income") # just a Kernel density plot

plotmeans(wordsum ~ year, data=d, xlab="survey year", ylab="wordsum vocabulary test", n.label=FALSE) # n.label=TRUE gives you the sample size per values in the x variable, so if you have numerous values in x, don't do it...

pairs(~ wordsum + bw1 + logincome, data=d, subset=age<70, main="Meng Hu is not smart, his IQ has only 3 digits") # scatterplot matrices

Correlations

Using cor() without specifying use="" will cause you to get NA. cor() needs to be used with complete.obs (listwise) or pairwise.complete.obs. Otherwise, it will not work if there are missing data. cor() can be used on the entire data set stored in object "d". Be careful if you have too many variables.

rcorr() produces 3 matrices, one for correlations, one for sample sizes, one for the asymptotic p-values. It uses pairwise deletion. And it automatically handles missing values.

cor.test() gives confidence intervals (only for pearson) and allows to test the type of the alternative hypothesis (against the null) which may be either "two.sided" or "less" (if the predicted correlation is negative) or "greater" (if the predicted correlation is positive).

require(Hmisc) # get rcorr() function

cor(d$wordsum, d$educ, use="pairwise.complete.obs", method="pearson")
cor(d$wordsum, d$educ, use="pairwise.complete.obs", method="spearman")
cor(d$wordsum, d$educ, use="pairwise.complete.obs", method="kendall") # Kendall takes a damn lot of time...

rcorr(d$wordsum, d$educ, type="pearson")
cor.test(d$wordsum, d$educ, alternative="two.sided", method="pearson", conf.level=0.95)
cor.test(d$wordsum, d$educ, alternative="less", method="pearson", conf.level=0.95)

Cronbach's alpha

require(psych) # load alpha() function, by far the best...
require(psy) # load cronbach() function
require(ltm) # load cronbach.alpha() function
require(dplyr) # get select() function

dk = select(d, worda, wordb, wordc, wordd, worde, wordf, wordg, wordh, wordi, wordj, wordsum, bw1)
dk = dk[complete.cases(dk),] # keep only cases with no missing data on all variables in dk
View(dk) # look carefully at the variables' columns
dk0 = dk # copy dk into dk0 object
dk1 = dk # copy dk into dk1 object
dk1$bw1[dk1$bw1==0] <- NA # remove blacks from dk1
dk0$bw1[dk0$bw1==1] <- NA # remove whites from dk0
dk0 = dk0[complete.cases(dk0),] # complete data for blacks "0"
dk1 = dk1[complete.cases(dk1),] # complete data for whites "1"

cronbach(dk[,1:10]) # items submitted to cronbach's alpha are columns 1-10
cronbach.alpha(dk[,1:10]) # items submitted to cronbach's alpha are columns 1-10

alpha(dk[,1:10]) # items submitted to cronbach's alpha are columns 1-10
alpha(dk0[,1:10]) # repeat analysis for blacks only
alpha(dk1[,1:10]) # repeat analysis for whites only

Multiple regression

Two formulas are possible : lm(d$y ~ d$x1 + d$x2) or lm(y ~ x1 + x2 , data=d). You can use sampling weight, and you can use a subset of the data, e.g., analysis on males only by using subset=gender<1. The inconvenience is that fitted() won't probably be working.

require(car) # get VIF() function
require(dplyr) # get select() function
require(lattice) # xyplot() function
require(lsr) # get standardCoefs() funtion

dm0 = select(d, wordsum, bw1, cohortdummy2, cohortdummy3, cohortdummy4, cohortdummy5, cohortdummy6, bw1C2, bw1C3, bw1C4, bw1C5, bw1C6, cohort6, gender, age9, weight)
dm1 = select(d, wordsum, bw1, cohortdummy2, cohortdummy3, cohortdummy4, cohortdummy5, cohortdummy6, bw1C2, bw1C3, bw1C4, bw1C5, bw1C6, aged1, aged2, aged3, aged4, aged6, aged7, aged8, aged9, cohort6, gender, age9, weight)
dm0 = dm0[complete.cases(dm0),] # complete data for model m0, allows you to get an actual variable for Yhat by using fitted()
dm1 = dm1[complete.cases(dm1),] # complete data for model m1, allows you to get an actual variable for Yhat by using fitted()

m0<- lm(wordsum ~ bw1 + cohortdummy2 + cohortdummy3 + cohortdummy4 + cohortdummy5 + cohortdummy6 + bw1C2 + bw1C3 + bw1C4 + bw1C5 + bw1C6, data=dm0, weights=dm0$weight)
summary(m0)
m1<- lm(wordsum ~ bw1 + cohortdummy2 + cohortdummy3 + cohortdummy4 + cohortdummy5 + cohortdummy6 + bw1C2 + bw1C3 + bw1C4 + bw1C5 + bw1C6 + aged1 + aged2 + aged3 + aged4 + aged6 + aged7 + aged8 + aged9, data=dm1, weights=dm1$weight)

summary(m1)
standardCoefs(m1) # unstandardized and standardized betas
confint(m1, level=.99) # confidence intervals of unstandardized coefficients at 0.99 CI level

plot(m1) # fitted vs resid, Q-Q, scale-location, resid vs leverage (push ENTER if you want to see the graphs)
plot(m1, which=1) # residuals vs fitted
plot(m1, which=2) # normal QQ (standardized residuals vs theoretical quantiles)
plot(m1, which=3) # scale-location (√standardized residuals vs fitted values)
plot(m1, which=4) # Cook's distance vs observations' number
plot(m1, which=5) # standardized residuals vs leverage
dm1$res1<-residuals(m1) # save residualized Y, but residuals() should better be used with complete.cases
dm1$stdres1<-rstandard(m1) # standardized residuals
dm1$pred1<-fitted(m1) # save predicted Y, but fitted() should better be used with complete.cases
qqnorm(res1, ylab="residualized values") # data points with no fitted line
qqline(res1, ylab="residualized values") # add fitted line to previous function
residualPlots(m1) # plots of Pearson residuals against each independent variables
vif(m1) # VIF needs "car" package
anova(m0, m1)

xyplot(dm1$pred1 ~ dm1$cohort6 | dm1$age9, data=dm1, groups=bw1, pch=19, type=c("p"), col = c('red', 'blue'), grid=TRUE, ylab="Wordsum predicted by OLS regression", xlab="age", key=list(text=list(c("Black", "White")), points=list(pch=c(19,19), col=c("red", "blue")), columns=2)) # multi-panel plot of Yhat against cohort6 for each category of age9

ANOVA can give you the significance of the difference in model fit of our two regressions, but remember that significance test is affected by sample size. I would largely prefer to employ the Cohen's f² measure of effect size suggested by Selya et al. (2012).

Errors-in-Variables (EIV) regression

This is a linear regression which corrects individually for measurement error in the independent variables (although I recommend SEM whenever it's possible). There is no R package available but Culpepper & Aguinis (2011) have a handmade program. The results are identical to those obtained from the eivreg function in Stata. The intercept, R² and confidence intervals seem to be missing however.

eiv<-function(formula,reliability,data){
mfx<-model.matrix(formula,data=data)
p<-length(mfx[1,])-1;n<-length(mfx[,1])
mf <- match.call(expand.dots = FALSE)
m <- match(c("formula", "data", "subset", "weights", "na.action", "offset"), names(mf), 0L)
mf <- mf[c(1L, m)]
mf$drop.unused.levels <- TRUE
mf[[1L]] <- as.name("model.frame")
mf <- eval(mf, parent.frame())
mf<-data.frame(mf)
MXX<-var(mfx[,c(2:(p+1))]);MXY<-var(mfx[,c(2:(p+1))],mf[,1])
Suu<-matrix(0,p,p)
if(p==1) Suu=(1-reliability)*MXX else
if(p>1) diag(Suu)<-(1-reliability)*diag(MXX)
Mxx<-MXX-(1-p/n)*Suu;Btilde<-solve(Mxx)%*%MXY
vY=var(mf[,1])
MSEtilde<-as.numeric(n*(vY-2*t(Btilde)%*%MXY+t(Btilde)%*%MXX%*%Btilde)/(n-p-1))
Rhat<-matrix(0,p,p);diag(Rhat)<-(t(Btilde)%*%Suu)^2
VCtilde<-MSEtilde*(1/n)*solve(Mxx)+(1/n)*solve(Mxx)%*%(Suu*MSEtilde+Suu%*%Btilde%*%t(Btilde)%*%Suu+2*Rhat)%*%solve(Mxx)
ttilde<-Btilde/sqrt(diag(VCtilde))
output<-cbind(reliability,Btilde,sqrt(diag(VCtilde)),ttilde,2*(1-pt(abs(ttilde),n-p)))
colnames(output)<-c('Reliability','Est.','S.E.','t','Prob.(>|t|)')
output
}
eiv(educ~wordsum+logincome+age,reliability=c(.71,.75,1),data=d)

Relative weight (regression) analysis

This is a technique which is claimed to be able to disentangle the tangle of correlations between the independent variables. According to its proponents, relative importance analysis is able to tell which independent variable is the strongest predictor, while classical regression cannot.

library(yhat) # use yhat package for dominance/commonality regression analysis
Meng_Hu_is_a_mean_boy_dont_look_at_him<-lm(wordsum~educ+degree+logincome+age,data=d)
dammit<-regr(Meng_Hu_is_a_mean_boy_dont_look_at_him)
dammit$Beta_Weights
dammit$Structure_Coefficients
dammit$Commonality_Data

Tobit regression

I know of two functions. censReg() and vglm() from censReg and VGAM packages. censReg displays numbers not available in VGAM, so it's complementary, however it does not (yet) allow sampling weights. Tobit requires you specify the right or upper value of the dependent variable to be censored. Here, wordsum has values 0-10, so I use 10 as the upper limit.

library(censReg) # censReg() function
library(VGAM) # vglm() function
library(lattice) # xyplot() function
 
still_hate_R<- censReg(wordsum ~ bw1 + agec + agec2 + agec3 + bw1agec + bw1agec2 + bw1agec3, right=10, data=d)
summary(still_hate_R)

So_complicated<- vglm(wordsum ~ bw1 + agec + agec2 + agec3 + bw1agec + bw1agec2 + bw1agec3, tobit(Upper=10), data=d, weights=d$weight)
summary(So_complicated)

d$wordsumpredictedage<- 5.210807+1.355356*d$bw1+-.0051079*d$agec+-.0016632*d$agec2+.000017*d$agec3+.0169752*d$bw1agec+.0002868*d$bw1agec2+.0000104*d$bw1agec3

d$wordsumpredictedage<-d$wordsumpredictedage+d$wordsum+d$bw1+d$age

xyplot(d$wordsumpredictedage ~ d$age, data=d, groups=bw1, pch=19, type=c("p"), col = c('red', 'blue'), grid=TRUE, ylab="Wordsum predicted by tobit", xlab="age", key=list(text=list(c("Black", "White")), points=list(pch=c(19,19), col=c("red", "blue")), columns=2))

Please, don't try to extract residuals, even though you can. R is not Stata. R is pretty stupid. Stata knows it's wrongdoing. In tobit, the Y is not treated as an observed but as a latent variable. The residuals can't be extracted in the usual way. You must use the method elaborated by Cameron & Trivedi (2009, pp. 535-538).

Logistic regression

LR evaluates how the probability of scoring 1 (versus 0) in the binary Y variable may vary across the values of the group variables or continuous variables of interest. Usually, people use socio-economic variables. But one can perform a Differential Item Functioning (DIF) test with LR (Oliveri et al., 2012). The theory is that an unbiased item's response across groups must produce identical curves for all the groups when conditioned on total test score and eventually the interaction of group membership with total score.

library(lattice) # xyplot() function
require(dplyr) # get select() function
dk<- select(d, worda, wordb, wordc, wordd, worde, wordf, wordg, wordh, wordi, wordj, wordsum, bw1)
dk = dk[complete.cases(dk),] # keep only cases with no missing data on all variables in dk

dk$bw1wordsum<- dk$bw1*dk$wordsum
Meng_Hu_controls_the_Illuminati<- glm(wordg ~ wordsum + bw1 + bw1wordsum, data=dk, family=binomial("logit")) # request logit
summary(Meng_Hu_controls_the_Illuminati)
dk$fittedlogit1 = fitted(Meng_Hu_controls_the_Illuminati)
anova(Meng_Hu_controls_the_Illuminati, test="Chisq") # gives p-values and Deviance and changes in Deviance between nested models for variables added sequentially
drop1(Meng_Hu_controls_the_Illuminati, test="Chisq") # gives p-values and Deviance and changes in Deviance between models with and witout a given variable

dk$logit1 = -4.12497 + 0.47041*dk$wordsum + -2.95061*dk$bw1 + 0.49316*dk$bw1wordsum
dk$odds1 = exp(dk$logit1)
dk$probability1 = dk$odds1/(1+dk$odds1)

xyplot(dk$probability1~ dk$wordsum, data=dk, groups=bw1, pch=19, type=c("p"), col = c('red', 'blue'), grid=TRUE, ylab="probability of correct answer in word g", xlab="wordsum total score", key=list(text=list(c("Black", "White")), points=list(pch=c(19,19), col=c("red", "blue")), columns=2))
xyplot(dk$fittedlogit1~ dk$wordsum, data=dk, groups=bw1, pch=19, type=c("p"), col = c('red', 'blue'), grid=TRUE, ylab="probability of correct answer in word g", xlab="wordsum total score", key=list(text=list(c("Black", "White")), points=list(pch=c(19,19), col=c("red", "blue")), columns=2))

anova() shows if the model fit is significantly different when variables are sequentially added and drop1 () shows if the model fit is significantly different when a given variable is dropped from the full model (i.e., with all x variables). Both functions could have been used with F, LRT, Rao, or Cp, instead of Chisq. See help(anova).

Multilevel (linear) regression

I use package lme4 for LME model. REML is the default option. You put FALSE if you want ML estimations. If you estimate a random intercept model, you do (1 | cohort21), but if you estimate a random slope model, you do : (bw1 | cohort21), where bw1 is the slope. You can specify multiple random slopes. If you want to add another level, let's say, with survey year, you do (bw1 | cohort21) + (1 | year). In this way, with age as fixed effects, you can completely isolate the variation of Y solely due to age, period, and cohort effects. The APC model of Yang & Land (2008).

Package MuMIn is also needed if you want to compute the R²GLMM of Johnson (2014) for random intercept and random slope. Johnson himself has a self-made syntax (see below) which produces similar result as the MuMIn. The package arm is needed if you want the standard errors of random coefficients.

require(lme4) # multilevel regression ('lme' does not work on my recent version of R)
require(arm) # extract standard errors of model coefficients
require(MuMIn) # r.squaredGLMM() function
require(lattice) # qqmath() function

model.ri<- lmer(wordsum ~ bw1 + aged1 + aged2 + aged3 + aged4 + aged6 + aged7 + aged8 + aged9 + bw1aged1 + bw1aged2 + bw1aged3 + bw1aged4 + bw1aged6 + bw1aged7 + bw1aged8 + bw1aged9 + (1 | cohort21) + (1 | year), data = d, REML=TRUE)
summary(model.ri) # fixed effects + -2LL stats
display(model.ri) # get AIC and DIC model fit indices
coef(model.ri) # random effect coefficients (model intercept + random residuals)
se.coef(model.ri) # their standard errors
ranef(model.ri) # random (i.e., intercept) residuals
qqmath(ranef(model.ri, condVar=TRUE)) # QQ plot of intercept residuals
coefs<- data.frame(coef(summary(model.ri)))
coefs$p.z <- 2 * (1 - pnorm(abs(coefs$t.value)))
coefs # get p-values of fixed effects

model.rs<- lmer(wordsum ~ bw1 + aged1 + aged2 + aged3 + aged4 + aged6 + aged7 + aged8 + aged9 + bw1aged1 + bw1aged2 + bw1aged3 + bw1aged4 + bw1aged6 + bw1aged7 + bw1aged8 + bw1aged9 + (bw1 | cohort21) + (1 | year), data = d, REML=TRUE)
summary(model.rs) # fixed effects + -2LL stats
display(model.rs) # get AIC and DIC model fit indices
coef(model.rs) # random effect coefficients (model intercept/slope + random residuals)
se.coef(model.rs) # their standard errors
ranef(model.rs) # random (i.e., intercept/slope) residuals
qqmath(ranef(model.rs, condVar=TRUE)) # QQ plot of intercept/slope residuals
coefs<- data.frame(coef(summary(model.rs)))
coefs$p.z <- 2 * (1 - pnorm(abs(coefs$t.value)))
coefs # get p-values of fixed effects

X <- model.matrix(model.ri)
n <- nrow(X)
Beta <- fixef(model.ri)
Sf <- var(X %*% Beta)
Sigma.list <- VarCorr(model.ri)
Sl <- 
 sum(
 sapply(Sigma.list,
 function(Sigma)
 {
 Z <-X[,rownames(Sigma)]
 sum(rowSums((Z %*% Sigma) * Z))/n
 }))
Se <- attr(Sigma.list, "sc")^2
Sd <- 0
total.var <- Sf + Sl + Se + Sd
(Rsq.m <- Sf / total.var) 
(Rsq.c <- (Sf + Sl) / total.var)

rm(X, n, Beta, Sf, Sigma.list, Sl, Se, Sd, total.var, Rsq.m, Rsq.c) # remove all these stored objects

X <- model.matrix(model.rs)
n <- nrow(X)
Beta <- fixef(model.rs)
Sf <- var(X %*% Beta)
Sigma.list <- VarCorr(model.rs)
Sl <- 
 sum(
 sapply(Sigma.list,
 function(Sigma)
 {
 Z <-X[,rownames(Sigma)]
 sum(rowSums((Z %*% Sigma) * Z))/n
 }))
Se <- attr(Sigma.list, "sc")^2
Sd <- 0
total.var <- Sf + Sl + Se + Sd
(Rsq.m <- Sf / total.var) 
(Rsq.c <- (Sf + Sl) / total.var)

r.squaredGLMM(model.ri)
r.squaredGLMM(model.rs)

chistat<- anova(model.ri, model.rs)[2,"Chisq"] # χ2 but model refitted with ML instead of REML
0.5 * pchisq(chistat, 1, lower.tail=FALSE) + 0.5 * pchisq(chistat, 2, lower.tail=FALSE)

require(dplyr) # get select() function
dk<- select(d, wordsum, bw1, aged1, aged2, aged3, aged4, aged6, aged7, aged8, aged9, bw1aged1, bw1aged2, bw1aged3, bw1aged4, bw1aged6, bw1aged7, bw1aged8, bw1aged9, age9, cohort21)
dk = dk[complete.cases(dk),] # keep only cases with no missing data on all variables in dk

lme1<- lmer(wordsum ~ bw1 + aged1 + aged2 + aged3 + aged4 + aged6 + aged7 + aged8 + aged9 + bw1aged1 + bw1aged2 + bw1aged3 + bw1aged4 + bw1aged6 + bw1aged7 + bw1aged8 + bw1aged9 + (bw1 | cohort21), data = dk, REML=TRUE)
dk$fittedlme1<-fitted(lme1) # fitted() works only with complete.cases

xyplot(dk$fittedlme1 ~ dk$cohort21 | dk$age9, data=dk, groups=bw1, pch=19, type=c("p"), col = c('red', 'blue'), grid=TRUE, ylab="Wordsum predicted by multilevel random slope model", xlab="age", key=list(text=list(c("Black", "White")), points=list(pch=c(19,19), col=c("red", "blue")), columns=2)) # multi-panel plot of Yhat against cohort6 for each category of age9

Differential Item Functioning analyses

Below, I show how to use difR. It's tedious. Because the items are selected not based on their variables' name but on the order listed in the data window. So let's create a subset of our data d into the object dk. We get a dk data with 12 variables. Let's look at the screenshot.

require(difR) # get difR package for summary stats of DIF items with diverse methods
require(dplyr) # get select() function
dk<- select(d, worda, wordb, wordc, wordd, worde, wordf, wordg, wordh, wordi, wordj, wordsum, bw1)
dk = dk[complete.cases(dk),] # keep only cases with no missing data on all variables in dk
View(dk)

R data column for DIF analysis (complete cases)

In the above picture, it appears that worda to wordj are listed in columns 2:11 and bw1 in column 13. This is false. If you try to select 13 in the columns, R will shoot you this message : "Error in `[.data.frame`(dk, , 13) : undefined columns selected". Indeed, the column row.names does not count as a real column's variable. Thus, worda:wordj are in columns 1:10, wordsum in column 11 and bw1 in column 12. Given this, we do :

mantelHaenszel(dk[,1:10], dk[,12], correct=TRUE, exact=FALSE, anchor=c(6,8:10)) # columns 1:10 for worda:wordj, column 12 for bw1, columns 6, 8, 9, 10 (word f, h, i, j) are specified as anchor variables (i.e., DIF free)

difMH(dk[,1:10], dk[,12], focal.name=0, MHstat="MHChisq", correct=TRUE, exact=FALSE, purify=TRUE, nrIter=10) # focal group (i.e., black) is coded 0 in bw1

plot(difMH(dk[,1:10], dk[,12], focal.name=0, MHstat="MHChisq", correct=TRUE, exact=FALSE, purify=TRUE, nrIter=10)) # plot of χ2 stats for each item (do not try to save it into an object and request it after because it won't work)

difLogistic1<-difLogistic(dk[,1:10], dk[,12], focal.name=0, type="udif", criterion="LRT", alpha=0.05, purify=TRUE, nrIter=10, save.output=TRUE) # uniform DIF
difLogistic2<-difLogistic(dk[,1:10], dk[,12], focal.name=0, type="nudif", criterion="LRT", alpha=0.05, purify=FALSE, nrIter=10, save.output=TRUE) # non-uniform DIF
difLogistic3<-difLogistic(dk[,1:10], dk[,12], focal.name=0, type="both", criterion="LRT", alpha=0.05, purify=FALSE, nrIter=10, save.output=TRUE) # uniform and non-uniform DIF

difLogistic1
difLogistic2
difLogistic3

plot(difLogistic1, save.plot = TRUE, save.options = c("difLogistic1", "default", "jpeg")) # plot LRT or Wald statistics per item for "udif"
plot(difLogistic2, save.plot = TRUE, save.options = c("difLogistic2", "default", "jpeg")) # plot LRT or Wald statistics per item for "nudif"
plot(difLogistic3, save.plot = TRUE, save.options = c("difLogistic3", "default", "jpeg")) # plot LRT or Wald statistics per item for "both"

plot(difLogistic1, plot="itemCurve", item="worda", save.plot = TRUE, save.options = c("ICCdifLogisticRace1wordA", "default", "jpeg"))
plot(difLogistic1, plot="itemCurve", item="wordb", save.plot = TRUE, save.options = c("ICCdifLogisticRace1wordB", "default", "jpeg"))
plot(difLogistic1, plot="itemCurve", item="wordc", save.plot = TRUE, save.options = c("ICCdifLogisticRace1wordC", "default", "jpeg"))
plot(difLogistic1, plot="itemCurve", item="wordd", save.plot = TRUE, save.options = c("ICCdifLogisticRace1wordD", "default", "jpeg"))
plot(difLogistic1, plot="itemCurve", item="worde", save.plot = TRUE, save.options = c("ICCdifLogisticRace1wordE", "default", "jpeg"))
plot(difLogistic1, plot="itemCurve", item="wordf", save.plot = TRUE, save.options = c("ICCdifLogisticRace1wordF", "default", "jpeg"))
plot(difLogistic1, plot="itemCurve", item="wordg", save.plot = TRUE, save.options = c("ICCdifLogisticRace1wordG", "default", "jpeg"))
plot(difLogistic1, plot="itemCurve", item="wordh", save.plot = TRUE, save.options = c("ICCdifLogisticRace1wordH", "default", "jpeg"))
plot(difLogistic1, plot="itemCurve", item="wordi", save.plot = TRUE, save.options = c("ICCdifLogisticRace1wordI", "default", "jpeg"))
plot(difLogistic1, plot="itemCurve", item="wordj", save.plot = TRUE, save.options = c("ICCdifLogisticRace1wordJ", "default", "jpeg"))

plot(difLogistic2, plot="itemCurve", item="worda", save.plot = TRUE, save.options = c("ICCdifLogisticRace2wordA", "default", "jpeg"))
plot(difLogistic2, plot="itemCurve", item="wordb", save.plot = TRUE, save.options = c("ICCdifLogisticRace2wordB", "default", "jpeg"))
plot(difLogistic2, plot="itemCurve", item="wordc", save.plot = TRUE, save.options = c("ICCdifLogisticRace2wordC", "default", "jpeg"))
plot(difLogistic2, plot="itemCurve", item="wordd", save.plot = TRUE, save.options = c("ICCdifLogisticRace2wordD", "default", "jpeg"))
plot(difLogistic2, plot="itemCurve", item="worde", save.plot = TRUE, save.options = c("ICCdifLogisticRace2wordE", "default", "jpeg"))
plot(difLogistic2, plot="itemCurve", item="wordf", save.plot = TRUE, save.options = c("ICCdifLogisticRace2wordF", "default", "jpeg"))
plot(difLogistic2, plot="itemCurve", item="wordg", save.plot = TRUE, save.options = c("ICCdifLogisticRace2wordG", "default", "jpeg"))
plot(difLogistic2, plot="itemCurve", item="wordh", save.plot = TRUE, save.options = c("ICCdifLogisticRace2wordH", "default", "jpeg"))
plot(difLogistic2, plot="itemCurve", item="wordi", save.plot = TRUE, save.options = c("ICCdifLogisticRace2wordI", "default", "jpeg"))
plot(difLogistic2, plot="itemCurve", item="wordj", save.plot = TRUE, save.options = c("ICCdifLogisticRace2wordJ", "default", "jpeg"))

plot(difLogistic3, plot="itemCurve", item="worda", save.plot = TRUE, save.options = c("ICCdifLogisticRace3wordA", "default", "jpeg"))
plot(difLogistic3, plot="itemCurve", item="wordb", save.plot = TRUE, save.options = c("ICCdifLogisticRace3wordB", "default", "jpeg"))
plot(difLogistic3, plot="itemCurve", item="wordc", save.plot = TRUE, save.options = c("ICCdifLogisticRace3wordC", "default", "jpeg"))
plot(difLogistic3, plot="itemCurve", item="wordd", save.plot = TRUE, save.options = c("ICCdifLogisticRace3wordD", "default", "jpeg"))
plot(difLogistic3, plot="itemCurve", item="worde", save.plot = TRUE, save.options = c("ICCdifLogisticRace3wordE", "default", "jpeg"))
plot(difLogistic3, plot="itemCurve", item="wordf", save.plot = TRUE, save.options = c("ICCdifLogisticRace3wordF", "default", "jpeg"))
plot(difLogistic3, plot="itemCurve", item="wordg", save.plot = TRUE, save.options = c("ICCdifLogisticRace3wordG", "default", "jpeg"))
plot(difLogistic3, plot="itemCurve", item="wordh", save.plot = TRUE, save.options = c("ICCdifLogisticRace3wordH", "default", "jpeg"))
plot(difLogistic3, plot="itemCurve", item="wordi", save.plot = TRUE, save.options = c("ICCdifLogisticRace3wordI", "default", "jpeg"))
plot(difLogistic3, plot="itemCurve", item="wordj", save.plot = TRUE, save.options = c("ICCdifLogisticRace3wordJ", "default", "jpeg"))

Logistik1<-Logistik(dk[,1:10], dk[,12], anchor=c(6,8:10), type="udif", criterion="LRT") # uniform DIF
Logistik2<-Logistik(dk[,1:10], dk[,12], anchor=c(6,8:10), type="nudif", criterion="LRT") # non-uniform DIF
Logistik3<-Logistik(dk[,1:10], dk[,12], anchor=c(6,8:10), type="both", criterion="LRT") # uniform and non-uniform DIF

Logistik1
Logistik2
Logistik3

difRaju1<-difRaju(dk[,1:10], group=dk[,12], focal.name=0, model="1PL", purify=TRUE, signed=TRUE)
difRaju2<-difRaju(dk[,1:10], group=dk[,12], focal.name=0, model="2PL", purify=TRUE, signed=TRUE)
difRaju3<-difRaju(dk[,1:10], group=dk[,12], focal.name=0, model="3PL", c=0.05, purify=FALSE, signed=TRUE)

difRaju1
difRaju2
difRaju3

plot(difRaju1, save.plot = TRUE, save.options = c("difRaju1", "default", "jpeg")) # plot Raju's statistic per item for 1PL
plot(difRaju2, save.plot = TRUE, save.options = c("difRaju2", "default", "jpeg")) # plot Raju's statistic per item for 2PL
plot(difRaju3, save.plot = TRUE, save.options = c("difRaju3", "default", "jpeg")) # plot Raju's statistic per item for 3PL

difLord1<-difLord(dk[,1:10], group=dk[,12], focal.name=0, model="1PL", engine="ltm", discr=1, irtParam=NULL, same.scale=TRUE, alpha=0.05, purify=TRUE, nrIter=10)
difLord2<-difLord(dk[,1:10], group=dk[,12], focal.name=0, model="2PL", engine="ltm", discr=1, irtParam=NULL, same.scale=TRUE, alpha=0.05, purify=FALSE, nrIter=10)
difLord3<-difLord(dk[,1:10], group=dk[,12], focal.name=0, model="3PL", engine="ltm", discr=1, irtParam=NULL, same.scale=TRUE, alpha=0.05, c=0.05, purify=FALSE, nrIter=10)

difLord1
difLord2
difLord3

plot(difLord1, save.plot = TRUE, save.options = c("difLord1", "default", "jpeg")) # plot Lord's χ2 per item for 1PL
plot(difLord2, save.plot = TRUE, save.options = c("difLord2", "default", "jpeg")) # plot Lord's χ2 per item for 2PL
plot(difLord3, save.plot = TRUE, save.options = c("difLord3", "default", "jpeg")) # plot Lord's χ2 per item for 3PL

plot(difLord1, plot="itemCurve", item="worda", save.plot = TRUE, save.options = c("ICCdifLordRace1PLwordA", "default", "jpeg"))
plot(difLord1, plot="itemCurve", item="wordb", save.plot = TRUE, save.options = c("ICCdifLordRace1PLwordB", "default", "jpeg"))
plot(difLord1, plot="itemCurve", item="wordc", save.plot = TRUE, save.options = c("ICCdifLordRace1PLwordC", "default", "jpeg"))
plot(difLord1, plot="itemCurve", item="wordd", save.plot = TRUE, save.options = c("ICCdifLordRace1PLwordD", "default", "jpeg"))
plot(difLord1, plot="itemCurve", item="worde", save.plot = TRUE, save.options = c("ICCdifLordRace1PLwordE", "default", "jpeg"))
plot(difLord1, plot="itemCurve", item="wordf", save.plot = TRUE, save.options = c("ICCdifLordRace1PLwordF", "default", "jpeg"))
plot(difLord1, plot="itemCurve", item="wordg", save.plot = TRUE, save.options = c("ICCdifLordRace1PLwordG", "default", "jpeg"))
plot(difLord1, plot="itemCurve", item="wordh", save.plot = TRUE, save.options = c("ICCdifLordRace1PLwordH", "default", "jpeg"))
plot(difLord1, plot="itemCurve", item="wordi", save.plot = TRUE, save.options = c("ICCdifLordRace1PLwordI", "default", "jpeg"))
plot(difLord1, plot="itemCurve", item="wordj", save.plot = TRUE, save.options = c("ICCdifLordRace1PLwordJ", "default", "jpeg"))

plot(difLord2, plot="itemCurve", item="worda", save.plot = TRUE, save.options = c("ICCdifLordRace2PLwordA", "default", "jpeg"))
plot(difLord2, plot="itemCurve", item="wordb", save.plot = TRUE, save.options = c("ICCdifLordRace2PLwordB", "default", "jpeg"))
plot(difLord2, plot="itemCurve", item="wordc", save.plot = TRUE, save.options = c("ICCdifLordRace2PLwordC", "default", "jpeg"))
plot(difLord2, plot="itemCurve", item="wordd", save.plot = TRUE, save.options = c("ICCdifLordRace2PLwordD", "default", "jpeg"))
plot(difLord2, plot="itemCurve", item="worde", save.plot = TRUE, save.options = c("ICCdifLordRace2PLwordE", "default", "jpeg"))
plot(difLord2, plot="itemCurve", item="wordf", save.plot = TRUE, save.options = c("ICCdifLordRace2PLwordF", "default", "jpeg"))
plot(difLord2, plot="itemCurve", item="wordg", save.plot = TRUE, save.options = c("ICCdifLordRace2PLwordG", "default", "jpeg"))
plot(difLord2, plot="itemCurve", item="wordh", save.plot = TRUE, save.options = c("ICCdifLordRace2PLwordH", "default", "jpeg"))
plot(difLord2, plot="itemCurve", item="wordi", save.plot = TRUE, save.options = c("ICCdifLordRace2PLwordI", "default", "jpeg"))
plot(difLord2, plot="itemCurve", item="wordj", save.plot = TRUE, save.options = c("ICCdifLordRace2PLwordJ", "default", "jpeg"))

plot(difLord3, plot="itemCurve", item="worda", save.plot = TRUE, save.options = c("ICCdifLordRace3PLwordA", "default", "jpeg"))
plot(difLord3, plot="itemCurve", item="wordb", save.plot = TRUE, save.options = c("ICCdifLordRace3PLwordB", "default", "jpeg"))
plot(difLord3, plot="itemCurve", item="wordc", save.plot = TRUE, save.options = c("ICCdifLordRace3PLwordC", "default", "jpeg"))
plot(difLord3, plot="itemCurve", item="wordd", save.plot = TRUE, save.options = c("ICCdifLordRace3PLwordD", "default", "jpeg"))
plot(difLord3, plot="itemCurve", item="worde", save.plot = TRUE, save.options = c("ICCdifLordRace3PLwordE", "default", "jpeg"))
plot(difLord3, plot="itemCurve", item="wordf", save.plot = TRUE, save.options = c("ICCdifLordRace3PLwordF", "default", "jpeg"))
plot(difLord3, plot="itemCurve", item="wordg", save.plot = TRUE, save.options = c("ICCdifLordRace3PLwordG", "default", "jpeg"))
plot(difLord3, plot="itemCurve", item="wordh", save.plot = TRUE, save.options = c("ICCdifLordRace3PLwordH", "default", "jpeg"))
plot(difLord3, plot="itemCurve", item="wordi", save.plot = TRUE, save.options = c("ICCdifLordRace3PLwordI", "default", "jpeg"))
plot(difLord3, plot="itemCurve", item="wordj", save.plot = TRUE, save.options = c("ICCdifLordRace3PLwordJ", "default", "jpeg"))

dichoDif(dk[,1:10], group=dk[,12], focal.name=0, method=c("TID","MH","Std", "Logistic", "Lord", "Raju"), model="1PL", correct=FALSE, thrSTD=0.08, thrTID=1, purify=FALSE)
dichoDif(dk[,1:10], group=dk[,12], focal.name=0, method=c("TID","MH","Std", "Logistic", "Lord", "Raju"), model="2PL", correct=FALSE, thrSTD=0.08, thrTID=1, purify=FALSE)

Notice that column 11 is not even specified in the syntax. The reason, I think, is because the total score variable wordsum is used as scaling factor. Here's a description of the arguments used :

anchor=c() specifies the item(s) included in your anchor set.
correct asks : should the continuity correction be used? (default is TRUE).
purify asks : should the method be used iteratively to purify the set of anchor items? (default is FALSE).
nrIter sets the maximal number of iterations in the item purification process (default is 10).
thrSTD is the threshold (cut-score) for standardized P-DIF statistic (default is 0.10).
thrTID is the threshold for detecting DIF items with TID method (default is 1.5).
save.output will save your output into a new file in your computer’s folder.

PCA, FA, CFA and MGCFA

Factor Analysis or Principal Component Analysis should be carried out. We get the pattern of the factor loading to be used in CFA/MGCFA. We also determine the numbers of factors to be extracted. In most application of MGCFA, I don't see people using parallel analysis to examine how many factors to extract (although it should be noted that it is sensitive to sample size). Sometimes, they use Scree plot, but eyeballing this tedious plot is no easy task. And sometimes they use the Kaiser's eigenvalue-greater-than-one rule, which is the worst one, since the cut-off is arbitrary and there is no clear delimitation (e.g., a factor with an eigenvalue of 1.01 considered as major and another of 0.99 as trivial). A recommended reading is Courtney (2013). Another approach is to look the theory within field of study for indications of how many factors to expect, given their interpretability. That may be a better approach because it's letting the science to drive the statistic rather than the statistic to drive the science.

When this step is done, we combine the two covariances (group1+group2) and we assess the model fit of equality in the pattern of loadings, in factor loadings, in means (intercepts) and, eventually, the equality in residuals. These are known as configural invariance, metric invariance, scalar invariance, residual invariance.

The following example is taken from the input data analyzed by Dolan (2000). This is an attempt to replication. So you should think about reading Dolan's article before going further. The purpose is to test a model with and without second-order latent factor.

library(lavaan) # get SEM, CFA, MGCFA programs
library(GPArotation) # get quartimax rotation
library(leaps) # FactoMineR() does not work without leaps installed or loaded
library(FactoMineR) # get PCA() function
library(psych) # get fa.parallel() function
library(nFactors) # parallel() function to determine number of factors to be extracted
library(semTools) # get measurementInvariance() function

black.means<-c(8.09, 7.91, 8.63, 7.86, 7.83, 9.18, 9.12, 8.12, 8.10, 7.70, 7.89, 8.86, 8.39)
black.sd<-c(2.65, 2.92, 2.75, 2.76, 2.53, 3.19, 2.95, 3.03, 3.03, 2.70, 2.96, 2.93, 3.22)
black.cor<-matrix(, nrow=13, ncol=13)
black.cor[lower.tri(black.cor, diag=TRUE)]<-c(1.00, 0.55, 0.53,
0.63, 0.49, 0.43, 0.32, 0.42, 0.29, 0.37, 0.31, 0.21, 0.26,
1.00, 0.46, 0.65, 0.48, 0.34, 0.21, 0.43, 0.36, 0.41, 0.36,
0.26, 0.24, 1.00, 0.52, 0.39, 0.50, 0.30, 0.32, 0.23, 0.40,
0.28, 0.28, 0.22, 1.00, 0.63, 0.41, 0.25, 0.43, 0.36, 0.41,
0.34, 0.28, 0.25, 1.00, 0.35, 0.24, 0.44, 0.38, 0.38, 0.35,
0.26, 0.30, 1.00, 0.43, 0.28, 0.30, 0.35, 0.25, 0.25, 0.28,
1.00, 0.29, 0.26, 0.26, 0.17, 0.25, 0.26, 1.00, 0.37, 0.48,
0.49, 0.16, 0.36, 1.00, 0.37, 0.41, 0.21, 0.32, 1.00, 0.57,
0.43, 0.29, 1.00, 0.39, 0.19, 1.00, 0.18, 1.00)
black.cor[upper.tri(black.cor, diag=TRUE)]<-t(black.cor)[upper.tri(black.cor, diag=TRUE)]
dimnames(black.cor)<-list(c("Information", "Similarities", "Arithmetic", "Vocabulary", "Comprehension", "DigitSpan", "TappingSpan", "PictureCompletion", "PictureArrangement", "BlockDesign", "ObjectAssembly", "Coding", "Mazes"), c("Information", "Similarities", "Arithmetic", "Vocabulary", "Comprehension", "DigitSpan", "TappingSpan", "PictureCompletion", "PictureArrangement", "BlockDesign", "ObjectAssembly", "Coding", "Mazes"))
black.cov<-cor2cov(black.cor, black.sd)

white.means<-c(10.41, 10.29, 10.37, 10.42, 10.44, 10.08, 10.09, 10.41, 10.37, 10.39, 10.73, 10.22, 10.41)
white.sd<-c(2.91, 3.01, 2.84, 2.94, 2.81, 3.00, 2.87, 2.87, 2.91, 2.92, 3.01, 3.30, 3.06)
white.cor<-matrix(, nrow=13, ncol=13)
white.cor[lower.tri(white.cor, diag=TRUE)]<-c(1.00, 0.58, 0.51,
0.66, 0.51, 0.34, 0.25, 0.35, 0.37, 0.44, 0.34, 0.26, 0.22,
1.00, 0.43, 0.63, 0.55, 0.33, 0.19, 0.40, 0.37, 0.45, 0.35,
0.25, 0.24, 1.00, 0.48, 0.40, 0.42, 0.32, 0.30, 0.26, 0.41,
0.23, 0.29, 0.24, 1.00, 0.61, 0.36, 0.24, 0.38, 0.39, 0.43,
0.33, 0.29, 0.21, 1.00, 0.23, 0.19, 0.35, 0.34, 0.38, 0.29,
0.23, 0.23, 1.00, 0.37, 0.16, 0.18, 0.29, 0.17, 0.28, 0.18,
1.00, 0.16, 0.19, 0.27, 0.15, 0.25, 0.19, 1.00, 0.34, 0.47,
0.41, 0.15, 0.29, 1.00, 0.41, 0.37, 0.22, 0.27, 1.00, 0.56,
0.30, 0.39, 1.00, 0.20, 0.31, 1.00, 0.18, 1.00)
white.cor[upper.tri(white.cor, diag=TRUE)]<-t(white.cor)[upper.tri(white.cor, diag=TRUE)]
dimnames(white.cor)<-list(c("Information", "Similarities", "Arithmetic", "Vocabulary", "Comprehension", "DigitSpan", "TappingSpan", "PictureCompletion", "PictureArrangement", "BlockDesign", "ObjectAssembly", "Coding", "Mazes"), c("Information", "Similarities", "Arithmetic", "Vocabulary", "Comprehension", "DigitSpan", "TappingSpan", "PictureCompletion", "PictureArrangement", "BlockDesign", "ObjectAssembly", "Coding", "Mazes"))
white.cov<-cor2cov(white.cor, white.sd)

combined.cor<-list(black=black.cor, white=white.cor)
combined.cov<-list(black=black.cov, white=white.cov)
combined.n<-list(black=305, white=1868)
combined.means<-list(black=black.means, white=white.means)
combined.cov

Mc<-function(object, digits=3){
fit<-inspect(object, "fit")
chisq=unlist(fit["chisq"])
df<-unlist(fit["df"])
n<-object@SampleStats@ntotal
ncp<-max(chisq-df,0)
d<-ncp/(n-1)
Mc=exp((d)*-.5)
Mc # Mcdonald's NCI (function made by Alexander Beaujean)
}

black.ev<-eigen(cor(black.cor))
black.ap<-parallel(subject=nrow(black.cor),var=ncol(black.cor), rep=100,cent=.05)
black.nS<-nScree(x=black.ev$values, aparallel=black.ap$eigen$qevpea)
plotnScree(black.nS) # how many factors to retain (using Scree plot)

white.ev<-eigen(cor(white.cor))
white.ap<-parallel(subject=nrow(white.cor),var=ncol(white.cor), rep=100,cent=.05)
white.nS<-nScree(x=white.ev$values, aparallel=white.ap$eigen$qevpea)
plotnScree(white.nS) # how many factors to retain (using Scree plot)

fa.parallel(black.cor, n.obs=305) # parallel analysis scree plot of factor analysis and principal component analysis
fa.parallel(white.cor, n.obs=1868) # parallel analysis scree plot of factor analysis and principal component analysis

vssblack=VSS(black.cor)
vssblack
VSS.plot(vssblack)
vssminresblack=VSS(black.cor, n=8, fm="minres", n.obs=426, plot=TRUE, title="Very Simple Structure for black correlation")
vssminresblack
VSS.plot(vssminresblack)
vssmleblack=VSS(black.cor, n=8, fm="mle", n.obs=426, plot=TRUE, title="Very Simple Structure for black correlation")
vssmleblack
VSS.plot(vssmleblack)
vsspablack=VSS(black.cor, n=8, fm="pa", n.obs=426, plot=TRUE, title="Very Simple Structure for black correlation")
vsspablack
VSS.plot(vsspablack)

vsswhite=VSS(white.cor)
vsswhite
VSS.plot(vsswhite)
vssminreswhite=VSS(white.cor, n=8, fm="minres", n.obs=426, plot=TRUE, title="Very Simple Structure for black correlation")
vssminreswhite
VSS.plot(vssminreswhite)
vssmlewhite=VSS(white.cor, n=8, fm="mle", n.obs=426, plot=TRUE, title="Very Simple Structure for black correlation")
vssmlewhite
VSS.plot(vssmlewhite)
vsspawhite=VSS(white.cor, n=8, fm="pa", n.obs=426, plot=TRUE, title="Very Simple Structure for black correlation")
vsspawhite
VSS.plot(vsspawhite)

EFAblack<-factanal(covmat=black.cor, factors=3, rotation="quartimax", n.obs=305) # factanal performs a maximum-likelihood factor analysis
print(EFAblack, digits=3, cutoff=.1) # My preferred cutoff is 0.10
EFAwhite<-factanal(covmat=white.cor, factors=3, rotation="quartimax", n.obs=1868) # factanal performs a maximum-likelihood factor analysis
print(EFAwhite, digits=3, cutoff=.1) # My preferred cutoff is 0.10

PCA(black.cor)
PCA(white.cor)

blackPCA<-princomp(black.cor, cor=TRUE)
summary(blackPCA)
loadings(blackPCA)
plot(blackPCA, type="lines")
biplot(blackPCA)
blackPCA$scores
blackPC<-principal(black.cor, nfactors=3, rotate="quartimax")
blackPC

whitePCA<-princomp(white.cor, cor=TRUE)
summary(whitePCA)
loadings(whitePCA)
plot(whitePCA, type="lines")
biplot(whitePCA)
whitePCA$scores
whitePC<-principal(white.cor, nfactors=3, rotate="quartimax")
whitePC

firstorder.model<- '
VERB =~ Information + Similarities + Arithmetic + Vocabulary + Comprehension + PictureCompletion + PictureArrangement
PERF =~ Similarities + Comprehension + PictureCompletion + PictureArrangement + BlockDesign + ObjectAssembly + Coding + Mazes
MEM =~ Information + Arithmetic + DigitSpan + TappingSpan + BlockDesign + Coding + Mazes
'
black.fit <- cfa(firstorder.model, sample.cov = black.cov, sample.nobs = 305)
white.fit <- cfa(firstorder.model, sample.cov = white.cov, sample.nobs = 1868)

black.fit
white.fit

blackCFA<-cfa(firstorder.model, sample.cov=black.cov, sample.nobs=305, sample.mean=black.means, meanstructure=TRUE)
fitMeasures(blackCFA, fit.measures="all")
coef(blackCFA)
summary(blackCFA, fit.measures=TRUE)
fitted(blackCFA)
resid(blackCFA, type="standardized")

whiteCFA<-cfa(firstorder.model, sample.cov=white.cov, sample.nobs=1868, sample.mean=white.means, meanstructure=TRUE)
fitMeasures(whiteCFA, fit.measures="all")
coef(whiteCFA)
summary(whiteCFA, fit.measures=TRUE)
fitted(whiteCFA)
resid(whiteCFA, type="standardized")

combined.firstorder.model <-'
VERB =~ Information + Similarities + Arithmetic + Vocabulary + Comprehension + PictureCompletion + PictureArrangement
PERF =~ Similarities + Comprehension + PictureCompletion + PictureArrangement + BlockDesign + ObjectAssembly + Coding + Mazes
MEM =~ Information + Arithmetic + DigitSpan + TappingSpan + BlockDesign + Coding + Mazes
'

configural.fit<-cfa(combined.firstorder.model, sample.cov=combined.cov, sample.nobs=combined.n, sample.mean=combined.means)
fitMeasures(configural.fit, fit.measures="all")
Mc(configural.fit)
MI.configural<-modificationIndices(configural.fit, standardized=TRUE, power=FALSE, delta=0.1, alpha=0.05, high.power=0.75)
subset(MI.configural, mi>4)

metric.fit<-cfa(combined.firstorder.model, sample.cov=combined.cov, sample.nobs=combined.n, sample.mean=combined.means, group.equal=c("loadings"))
fitMeasures(metric.fit, fit.measures="all")
Mc(metric.fit)
MI.metric<-modificationIndices(metric.fit, standardized=TRUE, power=FALSE, delta=0.1, alpha=0.05, high.power=0.75)
subset(MI.metric, mi>4)

scalar.fit<-cfa(combined.firstorder.model, sample.cov=combined.cov, sample.nobs=combined.n, sample.mean=combined.means, group.equal=c("loadings", "intercepts"))
fitMeasures(scalar.fit, fit.measures="all")
Mc(scalar.fit)
MI.scalar<-modificationIndices(scalar.fit, standardized=TRUE, power=FALSE, delta=0.1, alpha=0.05, high.power=0.75)
subset(MI.scalar, mi>4)

strict.fit<-cfa(combined.firstorder.model, sample.cov=combined.cov, sample.nobs=combined.n, sample.mean=combined.means, group.equal=c("loadings", "intercepts", "residuals"))
fitMeasures(strict.fit, fit.measures="all")
Mc(strict.fit)
MI.strict<-modificationIndices(strict.fit, standardized=TRUE, power=FALSE, delta=0.1, alpha=0.05, high.power=0.75)
subset(MI.strict, mi>4)

measurementInvariance(combined.firstorder.model, sample.cov=combined.cov, sample.nobs=combined.n, sample.mean=combined.means)

combined.highorder0.model <-'
VERB =~ Information + Similarities + Arithmetic + Vocabulary + Comprehension + PictureCompletion + PictureArrangement
PERF =~ Similarities + Comprehension + PictureCompletion + PictureArrangement + BlockDesign + ObjectAssembly + Coding + Mazes
MEM =~ Information + Arithmetic + DigitSpan + TappingSpan + BlockDesign + Coding + Mazes
g =~ VERB + PERF + MEM
'
combined.highorder1.model <-'
VERB =~ Information + Similarities + Arithmetic + Vocabulary + Comprehension + PictureCompletion + PictureArrangement
PERF =~ Similarities + Comprehension + PictureCompletion + PictureArrangement + BlockDesign + ObjectAssembly + Coding + Mazes
MEM =~ Information + Arithmetic + DigitSpan + TappingSpan + BlockDesign + Coding + Mazes
g =~ VERB + PERF + MEM
VERB ~ 0*1
PERF ~ 0*1
MEM ~ 0*1
g ~ 0*1
'
combined.highorder2.model <-'
VERB =~ Information + Similarities + Arithmetic + Vocabulary + Comprehension + PictureCompletion + PictureArrangement
PERF =~ Similarities + Comprehension + PictureCompletion + PictureArrangement + BlockDesign + ObjectAssembly + Coding + Mazes
MEM =~ Information + Arithmetic + DigitSpan + TappingSpan + BlockDesign + Coding + Mazes
g =~ VERB + PERF + MEM
VERB ~ 0*1
PERF ~ 0*1
MEM ~ 0*1
'
combined.highorder3.model <-'
VERB =~ Information + Similarities + Arithmetic + Vocabulary + Comprehension + PictureCompletion + PictureArrangement
PERF =~ Similarities + Comprehension + PictureCompletion + PictureArrangement + BlockDesign + ObjectAssembly + Coding + Mazes
MEM =~ Information + Arithmetic + DigitSpan + TappingSpan + BlockDesign + Coding + Mazes
g =~ VERB + PERF + MEM
PERF ~0*1
MEM ~0*1
'
combined.highorder4.model <-'
VERB =~ Information + Similarities + Arithmetic + Vocabulary + Comprehension + PictureCompletion + PictureArrangement
PERF =~ Similarities + Comprehension + PictureCompletion + PictureArrangement + BlockDesign + ObjectAssembly + Coding + Mazes
MEM =~ Information + Arithmetic + DigitSpan + TappingSpan + BlockDesign + Coding + Mazes
g =~ VERB + PERF + MEM
VERB ~0*1
MEM ~0*1
'
combined.highorder5.model <-'
VERB =~ Information + Similarities + Arithmetic + Vocabulary + Comprehension + PictureCompletion + PictureArrangement
PERF =~ Similarities + Comprehension + PictureCompletion + PictureArrangement + BlockDesign + ObjectAssembly + Coding + Mazes
MEM =~ Information + Arithmetic + DigitSpan + TappingSpan + BlockDesign + Coding + Mazes
g =~ VERB + PERF + MEM
VERB ~0*1
PERF ~0*1
'
combined.highorder6.model <-'
VERB =~ Information + Similarities + Arithmetic + Vocabulary + Comprehension + PictureCompletion + PictureArrangement
PERF =~ Similarities + Comprehension + PictureCompletion + PictureArrangement + BlockDesign + ObjectAssembly + Coding + Mazes
MEM =~ Information + Arithmetic + DigitSpan + TappingSpan + BlockDesign + Coding + Mazes
g =~ VERB + PERF + MEM
MEM ~0*1
'
combined.highorder7.model <-'
VERB =~ Information + Similarities + Arithmetic + Vocabulary + Comprehension + PictureCompletion + PictureArrangement
PERF =~ Similarities + Comprehension + PictureCompletion + PictureArrangement + BlockDesign + ObjectAssembly + Coding + Mazes
MEM =~ Information + Arithmetic + DigitSpan + TappingSpan + BlockDesign + Coding + Mazes
g =~ VERB + PERF + MEM
VERB ~0*1
'
combined.highorder8.model <-'
VERB =~ Information + Similarities + Arithmetic + Vocabulary + Comprehension + PictureCompletion + PictureArrangement
PERF =~ Similarities + Comprehension + PictureCompletion + PictureArrangement + BlockDesign + ObjectAssembly + Coding + Mazes
MEM =~ Information + Arithmetic + DigitSpan + TappingSpan + BlockDesign + Coding + Mazes
g =~ VERB + PERF + MEM
PERF ~0*1
'

highorder1.fit<-cfa(combined.highorder1.model, sample.cov=combined.cov, sample.nobs=combined.n, sample.mean=combined.means, group.equal=c("loadings", "residuals", "lv.variances"), group.partial=c("g~~g"))
fitMeasures(highorder1.fit, fit.measures="all")
Mc(highorder1.fit)
MI.highorder1<-modificationIndices(highorder1.fit, standardized=TRUE, power=FALSE, delta=0.1, alpha=0.05, high.power=0.75)
subset(MI.highorder1, mi>4)

highorder2.fit<-cfa(combined.highorder2.model, sample.cov=combined.cov, sample.nobs=combined.n, sample.mean=combined.means, meanstructure=TRUE, group.equal=c("loadings", "intercepts", "residuals", "lv.variances"), group.partial=c("g~~g"))
fitMeasures(highorder2.fit, fit.measures="all")
Mc(highorder2.fit)
MI.highorder2<-modificationIndices(highorder2.fit, standardized=TRUE, power=FALSE, delta=0.1, alpha=0.05, high.power=0.75)
subset(MI.highorder2, mi>4)

highorder3.fit<-cfa(combined.highorder3.model, sample.cov=combined.cov, sample.nobs=combined.n, sample.mean=combined.means, meanstructure=TRUE, group.equal=c("loadings", "intercepts", "residuals", "lv.variances"), group.partial=c("g~~g", "VERB~~VERB"))
fitMeasures(highorder3.fit, fit.measures="all")
Mc(highorder3.fit)
MI.highorder3<-modificationIndices(highorder3.fit, standardized=TRUE, power=FALSE, delta=0.1, alpha=0.05, high.power=0.75)
subset(MI.highorder3, mi>4)

highorder4.fit<-cfa(combined.highorder4.model, sample.cov=combined.cov, sample.nobs=combined.n, sample.mean=combined.means, meanstructure=TRUE, group.equal=c("loadings", "intercepts", "residuals", "lv.variances"), group.partial=c("g~~g", "PERF~~PERF"))
fitMeasures(highorder4.fit, fit.measures="all")
Mc(highorder4.fit)
MI.highorder4<-modificationIndices(highorder4.fit, standardized=TRUE, power=FALSE, delta=0.1, alpha=0.05, high.power=0.75)
subset(MI.highorder4, mi>4)

highorder5.fit<-cfa(combined.highorder5.model, sample.cov=combined.cov, sample.nobs=combined.n, sample.mean=combined.means, meanstructure=TRUE, group.equal=c("loadings", "intercepts", "residuals", "lv.variances"), group.partial=c("g~~g", "MEM~~MEM"))
fitMeasures(highorder5.fit, fit.measures="all")
Mc(highorder5.fit)
MI.highorder5<-modificationIndices(highorder5.fit, standardized=TRUE, power=FALSE, delta=0.1, alpha=0.05, high.power=0.75)
subset(MI.highorder5, mi>4)

highorder6.fit<-cfa(combined.highorder6.model, sample.cov=combined.cov, sample.nobs=combined.n, sample.mean=combined.means, meanstructure=TRUE, group.equal=c("loadings", "intercepts", "residuals", "lv.variances"), group.partial=c("g~~g", "PERF~~PERF", "VERB~~VERB"))
fitMeasures(highorder6.fit, fit.measures="all")
Mc(highorder6.fit)
MI.highorder6<-modificationIndices(highorder6.fit, standardized=TRUE, power=FALSE, delta=0.1, alpha=0.05, high.power=0.75)
subset(MI.highorder6, mi>4)

highorder7.fit<-cfa(combined.highorder7.model, sample.cov=combined.cov, sample.nobs=combined.n, sample.mean=combined.means, meanstructure=TRUE, group.equal=c("loadings", "intercepts", "residuals", "lv.variances"), group.partial=c("g~~g", "PERF~~PERF", "MEM~~MEM"))
fitMeasures(highorder7.fit, fit.measures="all")
Mc(highorder7.fit)
MI.highorder7<-modificationIndices(highorder7.fit, standardized=TRUE, power=FALSE, delta=0.1, alpha=0.05, high.power=0.75)
subset(MI.highorder7, mi>4)

highorder8.fit<-cfa(combined.highorder8.model, sample.cov=combined.cov, sample.nobs=combined.n, sample.mean=combined.means, meanstructure=TRUE, group.equal=c("loadings", "intercepts", "residuals", "lv.variances"), group.partial=c("g~~g", "MEM~~MEM", "VERB~~VERB"))
fitMeasures(highorder8.fit, fit.measures="all")
Mc(highorder8.fit)
MI.highorder8<-modificationIndices(highorder8.fit, standardized=TRUE, power=FALSE, delta=0.1, alpha=0.05, high.power=0.75)
subset(MI.highorder8, mi>4)

lambda.y=matrix(c(
0.804, 0.000, 0.232,
0.821, 0.153, 0.000,
0.364, 0.000, 0.835,
1.000, 0.000, 0.000,
0.752, 0.108, 0.000,
0.000, 0.000, 1.289,
0.000, 0.000, 1.000,
0.244, 0.626, 0.000,
0.297, 0.514, 0.000,
0.000, 0.885, 0.454,
0.000, 1.000, 0.000,
0.000, 0.248, 0.774,
0.000, 0.553, 0.338),13,3,byrow=TRUE)
gamma=matrix(c(1.000,.5188,.3835),3,1)
kappa=matrix(c(-2.5929),1,1)
decomposition.g=lambda.y%*%gamma%*%kappa
decomposition.g

combined.commonfactor1.model <-'
VERB =~ Information + Similarities + Arithmetic + Vocabulary + Comprehension + PictureCompletion + PictureArrangement
PERF =~ Similarities + Comprehension + PictureCompletion + PictureArrangement + BlockDesign + ObjectAssembly + Coding + Mazes
MEM =~ Information + Arithmetic + DigitSpan + TappingSpan + BlockDesign + Coding + Mazes
PERF ~0*1
MEM ~0*1
'
combined.commonfactor2.model <-'
VERB =~ Information + Similarities + Arithmetic + Vocabulary + Comprehension + PictureCompletion + PictureArrangement
PERF =~ Similarities + Comprehension + PictureCompletion + PictureArrangement + BlockDesign + ObjectAssembly + Coding + Mazes
MEM =~ Information + Arithmetic + DigitSpan + TappingSpan + BlockDesign + Coding + Mazes
VERB ~0*1
MEM ~0*1
'
combined.commonfactor3.model <-'
VERB =~ Information + Similarities + Arithmetic + Vocabulary + Comprehension + PictureCompletion + PictureArrangement
PERF =~ Similarities + Comprehension + PictureCompletion + PictureArrangement + BlockDesign + ObjectAssembly + Coding + Mazes
MEM =~ Information + Arithmetic + DigitSpan + TappingSpan + BlockDesign + Coding + Mazes
VERB ~0*1
PERF ~0*1
'
combined.commonfactor4.model <-'
VERB =~ Information + Similarities + Arithmetic + Vocabulary + Comprehension + PictureCompletion + PictureArrangement
PERF =~ Similarities + Comprehension + PictureCompletion + PictureArrangement + BlockDesign + ObjectAssembly + Coding + Mazes
MEM =~ Information + Arithmetic + DigitSpan + TappingSpan + BlockDesign + Coding + Mazes
VERB ~0*1
'
combined.commonfactor5.model <-'
VERB =~ Information + Similarities + Arithmetic + Vocabulary + Comprehension + PictureCompletion + PictureArrangement
PERF =~ Similarities + Comprehension + PictureCompletion + PictureArrangement + BlockDesign + ObjectAssembly + Coding + Mazes
MEM =~ Information + Arithmetic + DigitSpan + TappingSpan + BlockDesign + Coding + Mazes
MEM ~0*1
'
combined.commonfactor6.model <-'
VERB =~ Information + Similarities + Arithmetic + Vocabulary + Comprehension + PictureCompletion + PictureArrangement
PERF =~ Similarities + Comprehension + PictureCompletion + PictureArrangement + BlockDesign + ObjectAssembly + Coding + Mazes
MEM =~ Information + Arithmetic + DigitSpan + TappingSpan + BlockDesign + Coding + Mazes
PERF ~0*1
'

commonfactor1.fit<-cfa(combined.commonfactor1.model, sample.cov=combined.cov, sample.nobs=combined.n, sample.mean=combined.means, meanstructure=TRUE, group.equal=c("loadings", "intercepts", "residuals", "lv.variances", "lv.covariances"), group.partial=c("VERB~~VERB"))
fitMeasures(commonfactor1.fit, fit.measures="all")
Mc(commonfactor1.fit)
MI.commonfactor1<-modificationIndices(commonfactor1.fit, standardized=TRUE, power=FALSE, delta=0.1, alpha=0.05, high.power=0.75)
subset(MI.commonfactor1, mi>4)

commonfactor2.fit<-cfa(combined.commonfactor2.model, sample.cov=combined.cov, sample.nobs=combined.n, sample.mean=combined.means, meanstructure=TRUE, group.equal=c("loadings", "intercepts", "residuals", "lv.variances", "lv.covariances"), group.partial=c("PERF~~PERF"))
fitMeasures(commonfactor2.fit, fit.measures="all")
Mc(commonfactor2.fit)
MI.commonfactor2<-modificationIndices(commonfactor2.fit, standardized=TRUE, power=FALSE, delta=0.1, alpha=0.05, high.power=0.75)
subset(MI.commonfactor2, mi>4)

commonfactor3.fit<-cfa(combined.commonfactor3.model, sample.cov=combined.cov, sample.nobs=combined.n, sample.mean=combined.means, meanstructure=TRUE, group.equal=c("loadings", "intercepts", "residuals", "lv.variances", "lv.covariances"), group.partial=c("MEM~~MEM"))
fitMeasures(commonfactor3.fit, fit.measures="all")
Mc(commonfactor3.fit)
MI.commonfactor3<-modificationIndices(commonfactor3.fit, standardized=TRUE, power=FALSE, delta=0.1, alpha=0.05, high.power=0.75)
subset(MI.commonfactor3, mi>4)

commonfactor4.fit<-cfa(combined.commonfactor4.model, sample.cov=combined.cov, sample.nobs=combined.n, sample.mean=combined.means, meanstructure=TRUE, group.equal=c("loadings", "intercepts", "residuals", "lv.variances", "lv.covariances"), group.partial=c("PERF~~PERF", "MEM~~MEM", "PERF~~MEM"))
fitMeasures(commonfactor4.fit, fit.measures="all")
Mc(commonfactor4.fit)
MI.commonfactor4<-modificationIndices(commonfactor4.fit, standardized=TRUE, power=FALSE, delta=0.1, alpha=0.05, high.power=0.75)
subset(MI.commonfactor4, mi>4)

commonfactor5.fit<-cfa(combined.commonfactor5.model, sample.cov=combined.cov, sample.nobs=combined.n, sample.mean=combined.means, meanstructure=TRUE, group.equal=c("loadings", "intercepts", "residuals", "lv.variances", "lv.covariances"), group.partial=c("VERB~~VERB", "PERF~~PERF", "VERB~~PERF"))
fitMeasures(commonfactor5.fit, fit.measures="all")
Mc(commonfactor5.fit)
MI.commonfactor5<-modificationIndices(commonfactor5.fit, standardized=TRUE, power=FALSE, delta=0.1, alpha=0.05, high.power=0.75)
subset(MI.commonfactor5, mi>4)

commonfactor6.fit<-cfa(combined.commonfactor6.model, sample.cov=combined.cov, sample.nobs=combined.n, sample.mean=combined.means, meanstructure=TRUE, group.equal=c("loadings", "intercepts", "residuals", "lv.variances", "lv.covariances"), group.partial=c("VERB~~VERB", "MEM~~MEM", "VERB~~MEM"))
fitMeasures(commonfactor6.fit, fit.measures="all")
Mc(commonfactor6.fit)
MI.commonfactor6<-modificationIndices(commonfactor6.fit, standardized=TRUE, power=FALSE, delta=0.1, alpha=0.05, high.power=0.75)
subset(MI.commonfactor6, mi>4)

Meng Hu on HBD and Austrian Economics

Get (not so easily) introduced to R