Johnson-Neyman for meta analysis
In applied research, we typically look at moderation effect of a moderator on the relationship between a predictor and an outcome variable. However, looking at the interaction term alone is very misleading. Hence, researcher typically employ the Johnson-Neyman technique in probing the moderation effect. This technique helps find the region where the predictor effect is significant.
Even though interactions
package in R is very versatile that it can work with pretty much every model for probing the interaction effect, it couldn’t play well with metafor
package (for more details on moderation). Hence, we have to write a custom function for this purpose.
A little reminder of the moderation effect
\[ Y = b_1 X+ b_2 W + b_3 XW \]
Johnson-Neyman finds the values of the moderators \(W\) such that the p-value of the conditional effect of the predictor is equal to the level of significance \(\alpha\)
\[ t_{crit} = \frac{b_1 + b_3 W}{\sqrt{s^2_{b1} + 2 Ws_{b_1b_3} + W^2 s^2_{b_3}}} \]
The solution for \(W\) is
\[ W = \frac{-2(t_{crit}^2 s_{b_1b_3} - b_1b_3)\pm \sqrt{(2t^2_{crit}s_{b_1b_3}- 2b_1 b_3)^2-4(t_{crit}^2s^2_{b_3} - b_3^2)(t_{crit}^2 s^2_{b_1}-b_1^2)}}{2(t^2_{crit}s^2_{b_3}-b^2_3)} \]
To implement the custom function, I model the code after Marko Bachl
To get the Johnson-Neyman signficance area
jnt = function(.rma.mv, predictor, moderator, alpha = .05) {
require(stringi)
library(tidyverse)
# get coefficient for the predictor
b1 = .rma.mv$b[predictor, 1]
# get coefficient for the interaction term
b3 = .rma.mv$b[paste0(predictor, ":", moderator), 1]
# standard error for the predictor term
se_b1 = .rma.mv$se[which(rownames(summary(.rma.mv)$b) == predictor)]
# standard error for the interaction term
se_b3 = .rma.mv$se[which(rownames(summary(.rma.mv)$b) == paste0(predictor, ":", moderator))]
# covariance for the predictor and the interaction term
COV_b1b3 = .rma.mv$vb[which(rownames(summary(.rma.mv)$b) == predictor), which(rownames(summary(.rma.mv)$b) == paste0(predictor, ":", moderator))]
# the t-critical value
t_crit = qt(1 - alpha / 2, df.residual(.rma.mv))
# see Bauer & Curran, 2005
a = t_crit ^ 2 * se_b3 ^ 2 - b3 ^ 2
b = 2 * (t_crit ^ 2 * COV_b1b3 - b1 * b3)
c = t_crit ^ 2 * se_b1 ^ 2 - b1 ^ 2
jn = c((-b - sqrt(b ^ 2 - 4 * a * c)) / (2 * a),
(-b + sqrt(b ^ 2 - 4 * a * c)) / (2 * a))
JN = sort(unname(jn))
# see whether it's inside the two numbers or outside
if (length(JN) != 0){
# randomly pick a number between the two cutoffs
test_num <- runif(1, JN[1], JN[2])
test_theta = b1 + test_num * b3
test_se_theta = sqrt(se_b1 ^ 2 + 2 * test_num * COV_b1b3 + test_num ^ 2 * se_b3 ^ 2)
test.ci.lo_theta = test_theta - qt(1 - alpha / 2, df.residual(.rma.mv)) * test_se_theta
test.ci.hi_theta = test_theta + qt(1 - alpha / 2, df.residual(.rma.mv)) * test_se_theta
if (0 %in% c(test.ci.lo_theta:test.ci.hi_theta)){
print("inside the range")
} else {
print("outside the range")
}
}
return(JN)
rm(.rma.mv)
}
If the results are 0, 1, 2, then the whole range of the predictors are significant.
theta_plot = function(.rma.mv,
predictor,
moderator,
alpha = .05,
jn = F) {
require(dplyr)
require(ggplot2)
theme_set(theme_minimal())
require(stringi)
.data = tibble(
# get coefficient for the predictor
b1 = .rma.mv$b[predictor, 1],
# get coefficient for the interaction term
b3 = .rma.mv$b[paste0(predictor, ":", moderator), 1],
Z = quantile(.rma.mv$X[, moderator], seq(0, 1, .01)),
theta = b1 + Z * b3,
# standard error for the predictor term
se_b1 = .rma.mv$se[which(rownames(summary(.rma.mv)$b) == predictor)],
# standard error for the interaction term
se_b3 = .rma.mv$se[which(rownames(summary(.rma.mv)$b) == paste0(predictor, ":", moderator))],
# covariance for the predictor and the interaction term
COV_b1b3 = .rma.mv$vb[which(rownames(summary(.rma.mv)$b) == predictor), which(rownames(summary(.rma.mv)$b) == paste0(predictor, ":", moderator))],
se_theta = sqrt(se_b1 ^ 2 + 2 * Z * COV_b1b3 + Z ^ 2 * se_b3 ^ 2),
ci.lo_theta = theta - qt(1 - alpha / 2, df.residual(.rma.mv)) * se_theta,
ci.hi_theta = theta + qt(1 - alpha / 2, df.residual(.rma.mv)) * se_theta
)
if (jn) {
JN = jnt(
.rma.mv = .rma.mv,
predictor = predictor,
moderator = moderator,
alpha = alpha
)
JN_lines = geom_vline(xintercept = JN, linetype = 2)
JN_regions = ifelse(length(JN) == 0,
"no significance regions",
paste(round(JN, 2), collapse = "; "))
Xlab = paste0(moderator, " (JN Significance Regions: ", JN_regions, ")")
} else {
Xlab = moderator
JN_lines = NULL
}
.data %>%
ggplot(aes(x = Z, y = theta)) +
geom_ribbon(aes(ymin = ci.lo_theta, ymax = ci.hi_theta), fill = "grey70") +
geom_line() +
ggtitle(paste(
"Conditional Effect of",
predictor,
"as function of",
moderator
)) +
geom_hline(yintercept = 0, linetype = 2) +
labs(x = Xlab, y = expression(theta)) +
JN_lines +
xlim(min(.rma.mv$X[, moderator]), max(.rma.mv$X[, moderator]))
}
Let’s try it on a model
Note: both the moderator and the predictor must be continuous
library(metafor)
mod <- rma.mv(yi, vi, mods = ~ year * pubstatus, random = ~ 1 | study/esid, data= dat.assink2016)
summary(mod)
##
## Multivariate Meta-Analysis Model (k = 100; method: REML)
##
## logLik Deviance AIC BIC AICc
## -66.6051 133.2102 145.2102 160.5963 146.1540
##
## Variance Components:
##
## estim sqrt nlvls fixed factor
## sigma^2.1 0.1143 0.3381 17 no study
## sigma^2.2 0.1138 0.3373 100 no study/esid
##
## Test for Residual Heterogeneity:
## QE(df = 96) = 447.7313, p-val < .0001
##
## Test of Moderators (coefficients 2:4):
## QM(df = 3) = 9.8164, p-val = 0.0202
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## intrcpt 0.6985 0.2621 2.6655 0.0077 0.1849 1.2121 **
## year -0.0928 0.0464 -1.9990 0.0456 -0.1837 -0.0018 *
## pubstatus -0.3308 0.2826 -1.1704 0.2418 -0.8847 0.2231
## year:pubstatus 0.0608 0.0499 1.2170 0.2236 -0.0371 0.1586
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
jnt(mod, predictor = "year", moderator = "pubstatus")
## [1] "outside the range"
## [1] -0.02071889 0.94999771
theta_plot(mod, predictor = "year", moderator = "pubstatus", jn = T)
## [1] "outside the range"
Then we interpret that the impact of year on the outcome variable is only significant when the publication status is greater than 0.95 or less than -0.02 (but this is not possible because the lower limit is 0).