## ----echo=TRUE, message=FALSE, warning=FALSE---------------------------------- library(graphicalExtremes) library(dplyr) library(ggplot2) library(igraph) ## ----include=FALSE, eval=FALSE------------------------------------------------ # rightAlign <- function(txt){ # paste0('

', txt, '

') # } # now <- Sys.time() # knitr::knit_hooks$set(timeit = function(before, options, envir) { # if (before) { # now <<- Sys.time() # } else { # rightAlign(sprintf("\n_Code execution time: %.3f seconds._\n", Sys.time() - now)) # } # }) ## ----include=FALSE------------------------------------------------------------ par0 <- par() knitr::knit_hooks$set(nomargin = function(before) { if(before){ par0 <<- par(mar=c(0,0,0,0)) } else{ par(par0) } }) knitr::opts_chunk$set( collapse = TRUE, comment = '#>', error = TRUE, fig.align = 'center', timeit = TRUE ) theme_set( theme_bw() + theme( plot.background = element_blank(), legend.background = element_blank(), strip.background = element_rect(fill = "white"), plot.caption = element_text( size = 7.5, hjust = 0, margin = margin(t = 15)), text = element_text(size = 11), axis.ticks = element_blank(), panel.grid.major = element_line(size = 0.25) ) ) ## ----fig.width=6, fig.height=4------------------------------------------------ # Plot the physical locations of measuring stations, indicating flow volume by width plotDanube(useStationVolume = TRUE, useConnectionVolume = TRUE, xyRatio = 1.5) ## ----nomargin=TRUE------------------------------------------------------------ # Plot only the flow graph danube_flow <- getDanubeFlowGraph() plotDanubeIGraph(graph = danube_flow) ## ----fig.show='hold', out.width='50%', fig.align=''--------------------------- # Plot some pairwise scatter plots plotData <- as_tibble(danube$data_clustered) ggplot(plotData) + geom_point(aes(x = X1, y = X2)) ggplot(plotData) + geom_point(aes(x = X22, y = X28)) ## ----------------------------------------------------------------------------- X <- danube$data_clustered Y <- data2mpareto(data = X, p = .8) ## ----------------------------------------------------------------------------- theta <- .5 Ysim <- rmpareto(n = 100, model = "logistic", d = 2, par = theta) ## ----------------------------------------------------------------------------- G <- cbind(c(0, 1.5), c(1.5, 0)) Ysim <- rmpareto(n = 100, model = "HR", par = G) ## ----------------------------------------------------------------------------- chi_hat <- emp_chi(data = X, p = .8) ## ----------------------------------------------------------------------------- chi_hat <- emp_chi(data = Y) ## ----------------------------------------------------------------------------- Gamma_1_hat <- emp_vario(data = X, k = 1, p = 0.8) ## ----------------------------------------------------------------------------- Gamma_hat <- emp_vario(data = X, p = 0.8) Gamma_hat <- emp_vario(data = Y) ## ----out.width='33%', fig.align='', fig.show='hold', nomargin=TRUE------------ # A tree graph, a decomposable graph, and a non-decomposable (cycle) graph plot(graph_from_literal(1--2--3, 2--4)) plot(graph_from_literal(1--2--3--1, 2--4)) plot(graph_from_literal(1--2--3--4--1)) ## ----------------------------------------------------------------------------- set.seed(42) my_model <- generate_random_model(d = 4, graph_type = "tree") Ysim <- rmpareto_tree( n = 100, model = "HR", tree = my_model$graph, par = my_model$Gamma ) theta_vec <- c(.2, .8, .3) Ysim <- rmpareto_tree( n = 100, model = "logistic", tree = my_model$graph, par = theta_vec ) ## ----------------------------------------------------------------------------- # Utility function to plot fitted parameters against true/empirical ones plot_fitted_params <- function(G0, G1, xlab = 'True', ylab = 'Fitted'){ return( ggplot() + geom_point(aes( x = G0[upper.tri(G0)], y = G1[upper.tri(G1)] )) + geom_abline(slope = 1, intercept = 0) + xlab(xlab) + ylab(ylab) ) } ## ----------------------------------------------------------------------------- # Get flow graph from package flow_graph <- getDanubeFlowGraph() # Fit parameter matrix with this graph structure flow_Gamma <- fmpareto_graph_HR(data = Y, graph = flow_graph) # Compute likelihood/ICs and plot parameters flow_loglik <- loglik_HR( data = Y, Gamma = flow_Gamma, graph = flow_graph ) plot_fitted_params(chi_hat, Gamma2chi(flow_Gamma), xlab = 'Empirical') ## ----nomargin=TRUE, fig.align='', fig.show='hold', out.width='50%'------------ # Fit tree grpah to the data emst_fit <- emst(data = Y, method = "vario") # Compute likelihood/ICs, and plot fitted graph, parameters loglik_emst <- loglik_HR( data = Y, Gamma = emst_fit$Gamma, graph = emst_fit$graph ) plotDanubeIGraph(graph = emst_fit$graph) plot_fitted_params(chi_hat, Gamma2chi(emst_fit$Gamma), xlab = 'Empirical') ## ----nomargin=TRUE------------------------------------------------------------ Gamma <- cbind( c(0, 1.5, 1.5, 2), c(1.5, 0, 2, 1.5), c(1.5, 2, 0, 1.5), c(2, 1.5, 1.5, 0) ) Gamma2Sigma(Gamma, k = 1) Gamma2Theta(Gamma) Gamma2graph(Gamma) ## ----nomargin=TRUE------------------------------------------------------------ set.seed(42) my_tree <- generate_random_tree(d = 4) Gamma_vec <- c(.5, 1.4, .8) Gamma_comp <- complete_Gamma(Gamma = Gamma_vec, graph = my_tree) print(Gamma_comp) plot(Gamma2graph(Gamma_comp)) ## ----nomargin=TRUE------------------------------------------------------------ G <- rbind( c(0, 5, 7, 6, NA), c(5, 0, 14, 15, NA), c(7, 14, 0, 5, 5), c(6, 15, 5, 0, 6), c(NA, NA, 5, 6, 0) ) Gamma_comp <- complete_Gamma(Gamma = G) plot(Gamma2graph(Gamma_comp)) ## ----nomargin=TRUE------------------------------------------------------------ set.seed(42) G <- rbind( c(0, 5, 7, 6, 6), c(5, 0, 14, 15, 13), c(7, 14, 0, 5, 5), c(6, 15, 5, 0, 6), c(6, 13, 5, 6, 0) ) my_graph <- generate_random_connected_graph(d = 5, m = 5) Gamma_comp <- complete_Gamma(Gamma = G, graph = my_graph) plot(Gamma2graph(Gamma_comp)) ## ----nomargin=TRUE------------------------------------------------------------ set.seed(42) d <- 10 my_model <- generate_random_model(d = d, graph_type = "decomposable") plot(my_model$graph) Ysim <- rmpareto(n = 100, d = d, model = "HR", par = my_model$Gamma) my_fit <- fmpareto_graph_HR(data = Ysim, graph = my_model$graph, p = NULL) plot_fitted_params(Gamma2chi(my_model$Gamma), Gamma2chi(my_fit)) ## ----nomargin=TRUE------------------------------------------------------------ set.seed(1) d <- 20 my_model <- generate_random_model(d = d, graph_type = "general") plot(my_model$graph) Ysim <- rmpareto(n = 100, d = d, model = "HR", par = my_model$Gamma) my_fit <- fmpareto_graph_HR(data = Ysim, graph = my_model$graph, p = NULL, method = "vario") plot_fitted_params(Gamma2chi(my_model$Gamma), Gamma2chi(my_fit)) ## ----message=FALSE, warning=FALSE--------------------------------------------- # Run eglearn for a suitable list of penalization parameters rholist <- seq(0, 0.1, length.out = 11) eglearn_fit <- eglearn( data = Y, rholist = rholist, complete_Gamma = TRUE ) # Compute the corresponding likelihoods/ICs logliks_eglearn <- sapply( seq_along(rholist), FUN = function(j) { Gamma <- eglearn_fit$Gamma[[j]] if(is.null(Gamma)) return(c(NA, NA, NA)) loglik_HR( data = Y, Gamma = Gamma, graph = eglearn_fit$graph[[j]] ) } ) # Plot the BIC vs rho/number of edges ggplot(mapping = aes(x = rholist, y = logliks_eglearn['bic', ])) + geom_line() + geom_point(shape = 21, size = 3, stroke = 1, fill = "white") + geom_hline(aes(yintercept = flow_loglik['bic']), lty = "dashed") + geom_hline(aes(yintercept = loglik_emst['bic']), lty = "dotted") + xlab("rho") + ylab("BIC") + scale_x_continuous( breaks = rholist, labels = round(rholist, 3), sec.axis = sec_axis( trans = ~., breaks = rholist, labels = sapply(eglearn_fit$graph, igraph::gsize), name = "Number of edges" ) ) ## ----nomargin=TRUE, fig.align='', fig.show='hold', out.width='50%'------------ # Compare fitted chi to empirical one best_index <- which.min(logliks_eglearn['bic', ]) best_Gamma <- eglearn_fit$Gamma[[best_index]] best_graph <- eglearn_fit$graph[[best_index]] plotDanubeIGraph(graph = best_graph) plot_fitted_params(chi_hat, Gamma2chi(best_Gamma), xlab='Empirical')