## ----setup, include = FALSE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- knitr::opts_chunk$set(tidy.opts = list(width.cutoff = 80), tidy = TRUE) knitr::opts_chunk$set(comment = NA, warning = FALSE) knitr::opts_chunk$set(fig.width = 9, fig.height = 8) options(width = 1200) # width console output ## First, install the packages, if you have not done this already: #if (!require("restriktor")) install.packages("restriktor") ## Then, load the packages: #library(restriktor) # for the goric function # If you want to use restriktor from github: #if (!require("devtools")) install.packages("devtools") #library(devtools) #install_github("LeonardV/restriktor") #, force = TRUE library(restriktor) # for goric function # NB default iter is 1000 # als testen dan ws iter = iters gebruiken, met: # iters = 30 ## ----echo=FALSE, results = FALSE, message = FALSE, warning = FALSE---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- n.coef <- 3 # Number of dummy variables (model without intercept) mu <- rep(0, n.coef) intercept <- 0 f <- .25 #c(0, .1, .25, .4) # According to Cohen 1992 #f2 = r2 / (1-r2) -> r2 = f2 - f2*r2 -> (1-f2)*r2 = f2 #r2 <- f^2 / (1-f^2) # NB correspondeert niet met: 0, .02, .15, .35 # Voor nu zo laten, lijken wel goede voorbeelden samplesize <- 3*40 # c(40, 40, 40) - geeft fout in restr, data ms dan gek? # 40 - geeft nu ineens 40 in totaal b.ratios <- c(3,2,1) sample <- NULL # Determine true / population beta coefficients in data generating process D_ <- matrix(rep(1:n.coef), nrow = samplesize) D <- as.factor(D_) sample$D <- D sample <- model.matrix(~sample$D-1) colnames(sample) <- paste0("D_", seq_len(ncol(sample))) sample <- as.data.frame(sample) # Determine mean values (betas) var.e <- 1 if (sum(abs(b.ratios)) == 0) { # all zeros (thus under H0) means_pop <- b.ratios } else { fun <- function (d) { means_pop = b.ratios*d (1/sqrt(var.e)) * sqrt((1/n.coef) * sum((means_pop - mean(means_pop))^2)) - f } d <- uniroot(fun, lower=0, upper=100)$root # Construct means_pop means_pop <- b.ratios*d # Check #means_pop #[1] 0.9185587 0.6123724 0.3061862 #means_pop[1] - means_pop[2]; means_pop[2] - means_pop[3] # Check: ES = #(1/sqrt(var.e)) * sqrt((1/n.coef) * sum((means_pop - mean(means_pop))^2)) # .25 } set.seed(123) # set seed: to obtain same results when you re-run it epsilon <- rnorm(samplesize, sd=sqrt(var.e)) sample$y <- as.matrix(sample[, 1:n.coef]) %*% matrix(means_pop, nrow = n.coef) + epsilon # Obtain fit fit <- lm(y ~ 0 + D, data=sample) #fit ## ----message = FALSE, warning = FALSE--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # H1 vs complement - H1 is true H1 <- "D1 > D2 > D3" # mu1 > mu2 > mu3 # Apply GORIC # set.seed(123) results_1c <- goric(fit, hypotheses = list(H1), comparison = "complement") results_1c # Benchmarks based on null benchmarks_1c <- benchmark(results_1c, model_type = "means", iter = 100) #benchmarks_1c # use in R file print(benchmarks_1c, color = FALSE) # use in Rmd file, since Rmd cannot deal with colored text #plot(benchmarks_1c) plot(benchmarks_1c, x_lim = c(0, 57)) plot(benchmarks_1c, x_lim = c(0, 6)) ## ----message = FALSE, warning = FALSE--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- plot(benchmarks_1c, log_scale = TRUE) ## ----message = FALSE, warning = FALSE--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # H1, H2, and unconstrained (default) - subset/overlap, that is, H1 is true H1 <- "D1 > D2 > D3" # mu1 > mu2 > mu3 H2 <- "D1 > D2" # H2: D1 > D2, D3 # mu1 > mu2, mu3 # Apply GORIC # set.seed(123) results_12u <- goric(fit, hypotheses = list(H1, H2)) results_12u round(results_12u$ratio.gw, 3) # Benchmarks benchmarks_12u <- benchmark(results_12u, model_type = "means", iter = 100) #benchmarks_12u # R file print(benchmarks_12u, color = FALSE) # Rmd file plot(benchmarks_12u, output_type = "rgw") plot_out <- plot(benchmarks_12u) # save all plots in object plot_out plot(plot_out$grobs$`H1 vs. H2`) # call separate plot ## ----echo = F, message = FALSE, warning = FALSE----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- b.ratios <- c(2.5,2.5,1) sample <- NULL # Determine true / population beta coefficients in data generating process D_ <- matrix(rep(1:n.coef), nrow = samplesize) D <- as.factor(D_) sample$D <- D sample <- model.matrix(~sample$D-1) colnames(sample) <- paste0("D_", seq_len(ncol(sample))) sample <- as.data.frame(sample) # Determine mean values (betas) var.e <- 1 if (sum(abs(b.ratios)) == 0) { # all zeros (thus under H0) means_pop <- b.ratios } else { fun <- function (d) { means_pop = b.ratios*d (1/sqrt(var.e)) * sqrt((1/n.coef) * sum((means_pop - mean(means_pop))^2)) - f } d <- uniroot(fun, lower=0, upper=100)$root # Construct means_pop means_pop <- b.ratios*d # Check #means_pop #[1] 0.8838835 0.8838835 0.3535534 #means_pop[1] - means_pop[2]; means_pop[2] - means_pop[3] # Check: ES = #(1/sqrt(var.e)) * sqrt((1/n.coef) * sum((means_pop - mean(means_pop))^2)) # .25 } set.seed(124) # set seed: to obtain same results when you re-run it epsilon <- rnorm(samplesize, sd=sqrt(var.e)) sample$y <- as.matrix(sample[, 1:n.coef]) %*% matrix(means_pop, nrow = n.coef) + epsilon # Obtain fit fit_border <- lm(y ~ 0 + D, data=sample) #fit_border ## ----message = FALSE, warning = FALSE--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # H1 vs complement - border (nl., mu1 = mu2 > mu3) is true H1 <- "D1 > D2 > D3" # mu1 > mu2 > mu3 # Apply GORIC # set.seed(123) results_1c_border <- goric(fit_border, hypotheses = list(H1), comparison = "complement") results_1c_border ## ----message = FALSE, warning = FALSE--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # Default null (when using `model_type = "means"`) # Loglik benchmarks based on default null / no effect sizes, that is, # setting all three means equal in the population benchmarks_1c_border <- benchmark(results_1c_border, model_type = "means", iter = 100) # loglik diff #print(benchmarks_1c_border, output_type = "ld") # in R file print(benchmarks_1c_border, output_type = "ld", color = FALSE) # in Rmd file plot(benchmarks_1c_border, output_type = "ld") # ratio loglik weights #print(benchmarks_1c_border, output_type = "rlw") # in R file print(benchmarks_1c_border, output_type = "rlw", color = FALSE) # in Rmd file plot(benchmarks_1c_border, output_type = "rlw", x_lim = c(0, 1.1)) plot(benchmarks_1c_border, output_type = "rlw", log_scale = TRUE) ## ----message = FALSE, warning = FALSE--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # Specifying multiple null populations, that is, # using all possibilities of setting inequalities to equalities. # Here, we will use the default `model_type` (i.e., "asymp") which takes population parameter values (instead of population effect sizes) est <- coef(fit_border) pop_est <- matrix(c( mean(est[1:3]), mean(est[1:3]), mean(est[1:3]), mean(est[1:2]), mean(est[1:2]), est[3], mean(est[1:2]), est[2], mean(est[1:2]), est[1], mean(est[2:3]), mean(est[2:3]), est[1], est[2], est[3] ), byrow = TRUE, ncol = length(est)) rownames(pop_est) <- c("PE_123eq", "PE_12eq", "PE_13eq", "PE_23eq", "Observed") benchmarks_1c_border_allpos <- benchmark(results_1c_border, pop_est = pop_est, iter = 100) # loglik difference #print(benchmarks_1c_border_allpos, output_type = "ld") # R file print(benchmarks_1c_border_allpos, output_type = "ld", color = FALSE) # Rmd file plot(benchmarks_1c_border_allpos, output_type = "ld") plot(benchmarks_1c_border_allpos, output_type = "ld", x_lim = c(-2.5,2)) # ratio of loglik weights #print(benchmarks_1c_border_allpos, output_type = "rlw") # R file print(benchmarks_1c_border_allpos, output_type = "rlw", color = FALSE) # Rmd file plot(benchmarks_1c_border_allpos, output_type = "rlw") plot(benchmarks_1c_border_allpos, output_type = "rlw", x_lim = c(0, 3.6)) plot(benchmarks_1c_border_allpos, output_type = "rlw", log_scale = TRUE) ## ----echo = F, message = FALSE, warning = FALSE----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # Now, group size is 200 (instead of 40) samplesize_orig <- samplesize samplesize <- 3*200 b.ratios <- c(2.5,2.5,1) sample <- NULL # Determine true / population beta coefficients in data generating process D_ <- matrix(rep(1:n.coef), nrow = samplesize) D <- as.factor(D_) sample$D <- D sample <- model.matrix(~sample$D-1) colnames(sample) <- paste0("D_", seq_len(ncol(sample))) sample <- as.data.frame(sample) # Determine mean values (betas) var.e <- 1 if (sum(abs(b.ratios)) == 0) { # all zeros (thus under H0) means_pop <- b.ratios } else { fun <- function (d) { means_pop = b.ratios*d (1/sqrt(var.e)) * sqrt((1/n.coef) * sum((means_pop - mean(means_pop))^2)) - f } d <- uniroot(fun, lower=0, upper=100)$root # Construct means_pop means_pop <- b.ratios*d # Check #means_pop #[1] 0.8838835 0.8838835 0.3535534 #means_pop[1] - means_pop[2]; means_pop[2] - means_pop[3] # Check: ES = #(1/sqrt(var.e)) * sqrt((1/n.coef) * sum((means_pop - mean(means_pop))^2)) # .25 } set.seed(123) # set seed: to obtain same results when you re-run it epsilon <- rnorm(samplesize, sd=sqrt(var.e)) sample$y <- as.matrix(sample[, 1:n.coef]) %*% matrix(means_pop, nrow = n.coef) + epsilon # Obtain fit fit_border_200 <- lm(y ~ 0 + D, data=sample) #fit_border_200 # Return to original value samplesize <- samplesize_orig ## ----message = FALSE, warning = FALSE--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # Now, group size is 200 (instead of 40) # H1 vs complement - border (nl., mu1 = mu2 > mu3) is true H1 <- "D1 > D2 > D3" # mu1 > mu2 > mu3 # Apply GORIC # set.seed(123) results_1c_border_200 <- goric(fit_border_200, hypotheses = list(H1), comparison = "complement") results_1c_border_200 ## ----message = FALSE, warning = FALSE--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # Specifying multiple null populations, that is, # using all possibilities of setting inequalities to equalities. # Here, we will use the default `model_type` (i.e., "asymp") which takes population parameter values (instead of population effect sizes) est <- coef(fit_border_200) pop_est <- matrix(c( mean(est[1:3]), mean(est[1:3]), mean(est[1:3]), mean(est[1:2]), mean(est[1:2]), est[3], mean(est[1:2]), est[2], mean(est[1:2]), est[1], mean(est[2:3]), mean(est[2:3]), est[1], est[2], est[3] ), byrow = TRUE, ncol = length(est)) rownames(pop_est) <- c("PE_123eq", "PE_12eq", "PE_13eq", "PE_23eq", "Observed") benchmarks_1c_border_allpos_200 <- benchmark(results_1c_border_200, pop_est = pop_est, iter = 100) # loglik difference #print(benchmarks_1c_border_allpos_200, output_type = "ld") # R file print(benchmarks_1c_border_allpos_200, output_type = "ld", color = FALSE) # Rmd file plot(benchmarks_1c_border_allpos_200, output_type = "ld", x_lim = c(-3, 3)) # ratio of loglik weights #print(benchmarks_1c_border_allpos_200, output_type = "rlw") # R file print(benchmarks_1c_border_allpos_200, output_type = "rlw", color = FALSE) # Rmd file plot(benchmarks_1c_border_allpos_200, output_type = "rlw", x_lim = c(0, 3.6)) plot(benchmarks_1c_border_allpos_200, output_type = "rlw", log_scale = TRUE) ## ----message = FALSE, warning = FALSE--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # Specifying multiple null populations, that is, # using all possibilities of setting inequalities to equalities. # Here, we will use the default `model_type` (i.e., "asymp") which takes population parameter values (instead of population effect sizes) est <- coef(fit) pop_est <- matrix(c( mean(est[1:3]), mean(est[1:3]), mean(est[1:3]), mean(est[1:2]), mean(est[1:2]), est[3], mean(est[1:2]), est[2], mean(est[1:2]), est[1], mean(est[2:3]), mean(est[2:3]), est[1], est[2], est[3] ), byrow = TRUE, ncol = length(est)) rownames(pop_est) <- c("PE_123eq", "PE_12eq", "PE_13eq", "PE_23eq", "Observed") benchmarks_1c_allpos <- benchmark(results_1c, pop_est = pop_est, iter = 100) # loglik difference #print(benchmarks_1c_allpos, output_type = "ld") # R file print(benchmarks_1c_allpos, output_type = "ld", color = FALSE) # Rmd file plot(benchmarks_1c_border_allpos_200, output_type = "ld", x_lim = c(-5, 3)) # ratio of loglik weights #print(benchmarks_1c_allpos, output_type = "rlw") # R file print(benchmarks_1c_allpos, output_type = "rlw", color = FALSE) # Rmd file plot(benchmarks_1c_allpos, output_type = "rlw", x_lim = c(0, 3.6)) plot(benchmarks_1c_allpos, output_type = "rlw", log_scale = TRUE) ## ----echo = F, message = FALSE, warning = FALSE----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # Now, total sample size is 200 (instead of 40) samplesize_orig <- samplesize samplesize <- 3*200 b.ratios <- c(3,2,1) sample <- NULL # Determine true / population beta coefficients in data generating process D_ <- matrix(rep(1:n.coef), nrow = samplesize) D <- as.factor(D_) sample$D <- D sample <- model.matrix(~sample$D-1) colnames(sample) <- paste0("D_", seq_len(ncol(sample))) sample <- as.data.frame(sample) # Determine mean values (betas) var.e <- 1 if (sum(abs(b.ratios)) == 0) { # all zeros (thus under H0) means_pop <- b.ratios } else { fun <- function (d) { means_pop = b.ratios*d (1/sqrt(var.e)) * sqrt((1/n.coef) * sum((means_pop - mean(means_pop))^2)) - f } d <- uniroot(fun, lower=0, upper=100)$root # Construct means_pop means_pop <- b.ratios*d # Check #means_pop #[1] 0.9185587 0.6123724 0.3061862 #means_pop[1] - means_pop[2]; means_pop[2] - means_pop[3] # Check: ES = #(1/sqrt(var.e)) * sqrt((1/n.coef) * sum((means_pop - mean(means_pop))^2)) # .25 } set.seed(123) # set seed: to obtain same results when you re-run it epsilon <- rnorm(samplesize, sd=sqrt(var.e)) sample$y <- as.matrix(sample[, 1:n.coef]) %*% matrix(means_pop, nrow = n.coef) + epsilon # Obtain fit fit_200 <- lm(y ~ 0 + D, data=sample) #fit_200 # Return to original value samplesize <- samplesize_orig ## ----message = FALSE, warning = FALSE--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # Now, group size is 200 (instead of 40) # H1 vs complement H1 <- "D1 > D2 > D3" # mu1 > mu2 > mu3 # Apply GORIC # set.seed(123) results_1c_200 <- goric(fit_200, hypotheses = list(H1), comparison = "complement") results_1c_200 ## ----message = FALSE, warning = FALSE--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # Specifying multiple null populations, that is, # using all possibilities of setting inequalities to equalities. # Here, we will use the default `model_type` (i.e., "asymp") which takes population parameter values (instead of population effect sizes) est <- coef(fit_200) pop_est <- matrix(c( mean(est[1:3]), mean(est[1:3]), mean(est[1:3]), mean(est[1:2]), mean(est[1:2]), est[3], mean(est[1:2]), est[2], mean(est[1:2]), est[1], mean(est[2:3]), mean(est[2:3]), est[1], est[2], est[3] ), byrow = TRUE, ncol = length(est)) rownames(pop_est) <- c("PE_123eq", "PE_12eq", "PE_13eq", "PE_23eq", "Observed") benchmarks_1c_allpos_200 <- benchmark(results_1c_200, pop_est = pop_est, iter = 100) # loglik difference #print(benchmarks_1c_allpos, output_type = "ld") # R file print(benchmarks_1c_allpos_200, output_type = "ld", color = FALSE) # Rmd file plot(benchmarks_1c_allpos_200, output_type = "ld", x_lim = c(-6, 4.6)) # ratio of loglik weights #print(benchmarks_1c_allpos_200, output_type = "rlw") # R file print(benchmarks_1c_allpos_200, output_type = "rlw", color = FALSE) # Rmd file plot(benchmarks_1c_allpos_200, output_type = "rlw", x_lim = c(0, 10)) plot(benchmarks_1c_allpos_200, output_type = "rlw", log_scale = TRUE)