R program for ridge GLS estimation

This R program is for ridge GLS estimation as in the paper “Optimizing Ridge Generalized Least Squares for Structural Equation Modeling.”

Authors: Miao Yang and Ke-Hai Yuan

#—————————————————————————–
# R functions for ridge GLS estimation (don’t change codes in the functions)
#—————————————————————————–
# Computing the value for the ridge tuning parameter
a_pred<-function(N,p,m,q,df,sk,kt){
m_sqrt <- sqrt(m)
m_2 <- m^2
m_log_N_2 <- log(m)/sqrt(N)
m_N <- m/N
q_log <- log(q)
q_2_N <- q^2/(N)
df_log <- log(df)
df_2 <- df^2
df_log_logN <- log(df)/log(N)
df_log_N <- log(df)/(N)
df_N <- df/N
df_3_2_N <- (df)^(3/2.0)/(N)
sk_3_2 <- (sk)^(3/2.0)
sk_N <- sk/N
kt_log <- log(kt)
kt_log_logN <- log(kt)/log(N)
kt_sqrt_logN <- sqrt(kt)/log(N)
kt_log_N_2 <- log(kt)/sqrt(N)
kt_log_N <- log(kt)/(N)
kt_sqrt_N <- sqrt(kt)/(N)

cb= -25.5726+0.0006*N-1.9260*m_sqrt-0.0049*m_2+24.2975*m_log_N_2-28.7679*m_N-
0.0331*q_log+0.0044*q_2_N+2.2866*df_log-(9.490000e-08)*df_2+8.3315*df_log_logN+
11.8695*df_log_N-0.0045*df_N-0.0010*df_3_2_N+0.0070*sk_3_2-732.8503*sk_N+
5.8133*kt_log+73.4353*kt_log_logN+0.0150*kt_sqrt_logN+192.3647*kt_log_N_2+
378.9057*kt_log_N+760.7915*kt_sqrt_N
pi <- exp(cb)/(1+exp(cb))

if (pi>0.5) {
a <- 0.05
}else if (pi<0.5){
y <- 2.7380-3.9151*log(df)/log(N)-3.1905*log(kt)+6.9506*kt^2/sqrt(N)
a <- exp(y)/(1+exp(y))
}

return(a)
}

# Computing relative multivariate skewness and kurtosis
Mardia.tran1 <- function(x){
n <- nrow(x)
p <- ncol(x)
x.c <- x-matrix(apply(x,2,mean),nrow=n,ncol=p,byrow=T)

S <- cov(x)*(n-1)/n
S_inv <- solve(S)
D <- as.matrix(x.c)%*%S_inv%*%t(x.c)

b1 <- sum(D^3)
b2 <- sum(diag(D^2))/n #Mardia’s kurtosis

df <- p*(p+1)*(p+2)
sk <- b1/(n*df)
kt <- b2/(p*(p+2))

return(list(‘skew’=sk,’kurt’=kt))
}

# The following is to produce the matrix Gamma (Sy)
Sy <- function(n,p,x){
ps <- p*(p+1)/2
mean_xt <- apply(x,2,mean)
y <- matrix(0,n,ps)
x_c <- x-matrix(1,n,1)%*%mean_xt
x_ct <- t(x_c)
Scov <- x_ct %*% x_c /n
vscov <- matrixcalc::vech(Scov)
for (i in 1:n){
l <- 0
for (j in 1:p){
for (k in j:p){
l <- l+1
y[i,l] <- x_c[i,k]*x_c[i,j]
}
}
}
vscov_t <- t(vscov)
y <- y-matrix(1,n,1)%*%vscov_t # dim: n by p(p+1)/2
S_y <- t(y)%*%y/n
out <- list(‘S_y’=S_y, ‘vscov’=vscov, ‘Scov’=Scov)
return(out)
}

# Evaluate structured sigma and (dsigma/dtheta) for CFA models (needs modification if other models are used)
sigdsig <- function(p,m,theta0){
ps <- p*(p+1)/2
p_h <- p/m
p2 <- p*2
lamb <- matrix(0,p,m)
for (j in 1:m){
p_hj <- (j-1)*p_h+1
p_hj1 <- j*p_h
lamb[p_hj:p_hj1,j] <- theta0[p_hj:p_hj1]
}

psi_vec <- theta0[(p+1):p2]
phi <- matrix(1,m,m)
k <- 0
for (i in 1:(m-1)){
for (j in (i+1):m){
k <- k+1
phi[j,i] <- theta0[p2+k]
phi[i,j] <- theta0[p2+k]
}
}
Sigma0 <- lamb%*%phi%*%t(lamb) + diag(psi_vec)

# The following is to evaluate (dSigma0/dtheta)
# DL is the derivative with Lambda
DL <- matrix(0,ps,p)
lambphi <- lamb%*%phi
lambphi_t <- t(lambphi)
for (j in 1:m){
p_hj <- (j-1)*p_h+1
p_hj1 <- j*p_h
for (k in p_hj:p_hj1){
tt <- matrix(0,p,m)
tt[k,j] <- 1
ttt <- tt%*%lambphi_t+lambphi%*%t(tt)
DL[,k] <- matrixcalc::vech(ttt)
}
}
#Dps is the derivative with Psi
Dps <- matrix(0,ps,p)
for (j in 1:p){
tt <- matrix(0,p,p)
tt[j,j] <- 1
Dps[,j] <- matrixcalc::vech(tt)
}
#Dphi is the derivative with phi
ms <- m*(m-1)/2
Dphi <- matrix(0,ps,ms)
k <- 0
for (i in 1:(m-1)){
for (j in (i+1):m){
k <- k+1
tt <- matrix(0,m,m)
tt[j,i] <- 1 ; tt[i,j] <- 1
ttt <- lamb%*%tt%*%t(lamb)
Dphi[,k] <- matrixcalc::vech(ttt)
}
}
vdsig <- cbind(DL,Dps,Dphi)
out <- list(‘Sigma0’=Sigma0, ‘vdsig’=vdsig)
return(out)
}

# minimize Ridge GLS/ADF using Fisher-scoring
minGLS <- function(n,p,m,theta0){
require(lavaan)
require(MASS)
require(matrixcalc)

run.Sy <- Sy(n,p,x=as.matrix(variables))
S_y <- run.Sy$S_y
vscov <- run.Sy$vscov
weight <- a*S_y +(1-a)*diag(ps)

ep <- 0.0001
ps <- p*(p+1)/2
q <- length(theta0)
df <- ps-q
diverg <- 1
estimate_GLS <- NA
test_GLS <- NA
delta <- 0.1
for(i in 1:300){
sig <- sigdsig(p,m, theta0) # gives a list of 2: sig$Sigma0; sig$vdsig
vsig0 <- matrixcalc::vech(sig$Sigma0)
dswe <- t(sig$vdsig) %*% weight
dwd <- dswe %*% sig$vdsig
stdi <- try(solve(dwd),silent=T)
if(is.character(stdi)){
diverg <- 1 # not converge
return(list(test_GLS=NA, estimate_GLS=NA, diverg=diverg))
}
eresdu <- vscov-vsig0
dtheta <- stdi %*% dswe %*% eresdu
theta0 <- theta0 + dtheta
delta <- max(abs(dtheta))
if(delta<=ep) break;
}
if(i<300){
diverg <- 0 #converge within 300 iteration
# Test start here;
f_rGLS <- t(eresdu) %*% weight %*% eresdu
T_GLS <- (n-1)*f_rGLS
Var <- stdi %*%dswe %*%S_y %*%t(dswe) %*%stdi
SE <- sqrt(diag(Var)/(n-1))
# print(“theta_rGLS–SE_rGLS=”)
Vmat <- weight-t(dswe)%*%stdi%*%dswe
VS_y <- Vmat %*% S_y
tVS_y <- sum(diag(VS_y))

VS_y2 <- VS_y %*% VS_y
tVS_y2 <- sum(diag(VS_y2))
cst <- tVS_y/df
T_GLS1 <- T_GLS/cst
# pra1 <- 1-pchisq(T_GLS1,df)
cst_1 <- tVS_y2/tVS_y
cst_2 <- (tVS_y*tVS_y)/tVS_y2
T_GLS2 <- T_GLS/cst_1
test_GLS <- cbind(T_GLS,T_GLS1,T_GLS2)
estimate_GLS <- cbind(theta0,SE)

}
colnames(test_GLS)<-c(“T”,”T_rescaled”,”T_adjusted”)
return(list(test_GLS=test_GLS, estimate_GLS=estimate_GLS, diverg=diverg))
}

#——————————————————–
# Codes that need user’s modification
#——————————————————–

data(HolzingerSwineford1939) # a dataset for illustration
variables <- HolzingerSwineford1939[,7:15]
n <- nrow(variables) # sample size
p <- ncol(variables) # number of variables
m <- 3 # number of latent factors
q <- 2*p+m*(m-1)/2 # number of parameters
ps <- p*(p+1)/2
df <- ps-q # degrees of freedom
sk <- Mardia.tran1(variables)$skew #skewness
kt <- Mardia.tran1(variables)$kurt #kurtorsis
a <- a_pred(N=n,p,m,q,df,sk,kt) #predicted value for the optimal ridge tuning paramter

theta0 <- rep(1,q) #starting values

minGLS(n,p,m,theta0) # output includes a list of test statistics,
# parameter estimates by ridge GLS and the corresponding SEs,
# and whether converged or not (diverge=0 means converged)