Introduction to forrest

forrest creates publication-ready forest plots from any data frame that contains point estimates and confidence intervals. A single function, forrest(), handles the full range of use cases — regression model results, subgroup analyses, meta-analyses, dose-response patterns, and more.

The only hard dependency is tinyplot. forrest works with base R data frames, tibbles, and data.tables.

Basic forest plot

The simplest call requires only three column names: estimate, lower, and upper. Here we display adjusted regression coefficients from a linear model predicting systolic blood pressure (SBP).

dat <- data.frame(
  predictor = c("Age (per 10 y)", "Female sex", "BMI (per 5 kg/m\u00b2)",
                "Current smoker", "Physically active"),
  estimate  = c( 0.18, -0.42,  0.11, -0.31,  0.24),
  lower     = c( 0.05, -0.61, -0.04, -0.52,  0.08),
  upper     = c( 0.31, -0.23,  0.26, -0.10,  0.40)
)

forrest(
  dat,
  estimate = "estimate",
  lower    = "lower",
  upper    = "upper",
  label    = "predictor",
  xlab     = "Regression coefficient (95% CI)"
)

Section headers from a grouping column

Pass a column name to section to automatically group rows under bold section headers. forrest() inserts a header row wherever the section value changes, indents the row labels within each section, and adds a blank spacer row after each section. No manual data manipulation is required.

sub_dat <- data.frame(
  subgroup = c("Sex",    "Sex",
               "Age group", "Age group", "Age group"),
  label    = c("Female", "Male",
               "30\u201349 years", "50\u201369 years", "70+ years"),
  estimate = c(-0.38,  0.12,  0.22, -0.15, -0.41),
  lower    = c(-0.58, -0.08,  0.02, -0.38, -0.66),
  upper    = c(-0.18,  0.32,  0.42,  0.08, -0.16)
)

forrest(
  sub_dat,
  estimate = "estimate",
  lower    = "lower",
  upper    = "upper",
  label    = "label",
  section  = "subgroup",
  xlab     = "Regression coefficient (95% CI)",
  header   = "Subgroup"
)

Use section_indent = FALSE to suppress automatic indentation, and section_spacer = FALSE to suppress the blank row after each section.

Two-level hierarchy with subsection

For analyses with a nested grouping structure, combine section and subsection. forrest() inserts top-level bold headers for section changes and indented sub-headers for subsection changes within each section.

nested_dat <- data.frame(
  domain    = c(
    "Physical environment", "Physical environment", "Physical environment",
    "Physical environment", "Physical environment",
    "Social environment",   "Social environment",
    "Social environment",   "Social environment"
  ),
  type      = c(
    "Air quality", "Air quality",
    "Urban form",  "Urban form",  "Urban form",
    "Support",     "Support",
    "Deprivation", "Deprivation"
  ),
  predictor = c(
    "PM2.5 (per 10 \u03bcg/m\u00b3)", "NO2 (per 10 ppb)",
    "Green space (%)", "Walkability", "Noise (per 10 dB)",
    "Social cohesion", "Social isolation",
    "Area deprivation", "Employment rate"
  ),
  estimate  = c(-0.18,  0.12, -0.22,  0.15, -0.08,
                -0.11,  0.09,  0.05, -0.03),
  lower     = c(-0.38, -0.08, -0.42, -0.05, -0.28,
                -0.31, -0.09, -0.12, -0.20),
  upper     = c( 0.02,  0.32, -0.02,  0.35,  0.12,
                 0.09,  0.27,  0.22,  0.14)
)

forrest(
  nested_dat,
  estimate   = "estimate",
  lower      = "lower",
  upper      = "upper",
  label      = "predictor",
  section    = "domain",
  subsection = "type",
  xlab       = "Mean difference in SBP (mmHg, 95% CI)"
)

Adding a summary (pooled) estimate

Mark one or more rows with is_summary = TRUE to draw them as filled diamonds instead of squares. This is useful for pooled estimates in meta-analyses or for overall effects after subgroup rows.

sex_dat <- data.frame(
  label    = c("Female", "Male", "Overall"),
  estimate = c(-0.42, -0.29, -0.36),
  lower    = c(-0.61, -0.48, -0.50),
  upper    = c(-0.23, -0.10, -0.22),
  is_sum   = c(FALSE, FALSE, TRUE)
)

forrest(
  sex_dat,
  estimate   = "estimate",
  lower      = "lower",
  upper      = "upper",
  label      = "label",
  is_summary = "is_sum",
  xlab       = "Regression coefficient (95% CI)",
  title      = "Association of female sex with SBP by subgroup"
)

Group colouring

Pass a group column to colour estimates by a categorical variable. A legend is added automatically using the Okabe-Ito colorblind-safe palette.

grp_dat <- data.frame(
  predictor = rep(
    c("Air pollution (PM2.5)", "Noise exposure",
      "Green space access", "Walkability index",
      "Food environment"), 2
  ),
  domain    = rep(c("Physical environment", "Social environment"),
                  each = 5),
  estimate  = c(-0.18,  0.12, -0.22,  0.15, -0.08,
                 0.05, -0.03,  0.09, -0.11,  0.14),
  lower     = c(-0.38, -0.08, -0.42, -0.05, -0.28,
                -0.12, -0.20, -0.09, -0.31, -0.04),
  upper     = c( 0.02,  0.32, -0.02,  0.35,  0.12,
                 0.22,  0.14,  0.27,  0.09,  0.32)
)

forrest(
  grp_dat,
  estimate = "estimate",
  lower    = "lower",
  upper    = "upper",
  label    = "predictor",
  group    = "domain",
  xlab     = "Mean difference in SBP (mmHg, 95% CI)"
)

Multiple estimates per row (dodge)

Set dodge = TRUE (or a positive number) when consecutive rows share the same label value. The CIs are vertically offset within each label band, and the label is displayed once at the group centre. Combine with group to colour the series.

dodge_dat <- data.frame(
  exposure = rep(
    c("PM2.5 (per 10 \u03bcg/m\u00b3)", "NO2 (per 10 ppb)",
      "Noise (per 10 dB)", "Green space (%)", "Walkability"),
    each = 2
  ),
  period   = rep(c("Childhood", "Adulthood"), 5),
  estimate = c(
     0.14, -0.05,  0.08,  0.12, -0.19, -0.06,  0.11, -0.03,  0.07,  0.10
  ),
  lower    = c(
    -0.10, -0.26, -0.09, -0.08, -0.40, -0.25, -0.05, -0.12, -0.14, -0.09
  ),
  upper    = c(
     0.38,  0.16,  0.25,  0.32,  0.02,  0.13,  0.27,  0.06,  0.28,  0.29
  )
)

forrest(
  dodge_dat,
  estimate = "estimate",
  lower    = "lower",
  upper    = "upper",
  label    = "exposure",
  group    = "period",
  dodge    = TRUE,
  header   = "Environmental exposure",
  ref_line = 0,
  xlab     = "Mean difference in SBP (mmHg, 95% CI)"
)

A numeric value for dodge sets the vertical spacing between rows within a group directly (in y-axis units). dodge = TRUE uses the default of 0.25. Structural rows (section and subsection headers, spacers) are always treated as singleton groups and are not affected by dodging.

Wide-format text columns alongside dodged CIs

By default, cols text values appear at each row’s dodged y position, keeping them aligned with their CI whiskers. Set cols_by_group = TRUE to collapse each text column to one value per label group — this produces a wide table with one row per label and one column per condition, matching the layout commonly seen in multi-period epidemiology papers.

# Add per-condition formatted text columns to the long-format data
dodge_dat$est_ci     <- sprintf("%.2f (%.2f, %.2f)",
                                dodge_dat$estimate,
                                dodge_dat$lower,
                                dodge_dat$upper)
dodge_dat$text_child <- ifelse(
  dodge_dat$period == "Childhood", dodge_dat$est_ci, ""
)
dodge_dat$text_adult <- ifelse(
  dodge_dat$period == "Adulthood", dodge_dat$est_ci, ""
)

forrest(
  dodge_dat,
  estimate      = "estimate",
  lower         = "lower",
  upper         = "upper",
  label         = "exposure",
  group         = "period",
  dodge         = TRUE,
  cols_by_group = TRUE,
  cols          = c("Childhood (95% CI)" = "text_child",
                    "Adulthood (95% CI)" = "text_adult"),
  widths        = c(2.8, 3.5, 2.2, 2.2),
  header        = "Environmental exposure",
  ref_line      = 0,
  xlab          = "Mean difference in SBP (mmHg, 95% CI)"
)

Point shapes

Pass a shape column to assign different point characters per category. Use together with group and dodge to encode two categorical dimensions at once — for example, colour = time period and shape = sex.

shape_dat <- data.frame(
  exposure = rep(
    c("PM2.5 (per 10 \u03bcg/m\u00b3)", "NO2 (per 10 ppb)",
      "Noise (per 10 dB)", "Green space (%)", "Walkability"),
    each = 4
  ),
  period   = rep(rep(c("Childhood", "Adulthood"), each = 2), 5),
  sex      = rep(c("Female", "Male"), 10),
  estimate = c(
     0.22, -0.08, -0.10,  0.18,
     0.11,  0.05,  0.00,  0.14,
    -0.31,  0.06, -0.09, -0.02,
     0.08,  0.12, -0.04,  0.03,
     0.17, -0.06,  0.22,  0.01
  ),
  lower    = c(
    -0.20, -0.38, -0.28, -0.14,
    -0.12, -0.22, -0.22, -0.14,
    -0.56, -0.24, -0.38, -0.30,
    -0.18, -0.14, -0.22, -0.15,
    -0.09, -0.38, -0.04, -0.27
  ),
  upper    = c(
     0.64,  0.22,  0.08,  0.50,
     0.34,  0.32,  0.22,  0.42,
    -0.06,  0.36,  0.20,  0.26,
     0.34,  0.38,  0.14,  0.21,
     0.43,  0.26,  0.48,  0.29
  )
)

forrest(
  shape_dat,
  estimate = "estimate",
  lower    = "lower",
  upper    = "upper",
  label    = "exposure",
  group    = "period",
  shape    = "sex",
  dodge    = TRUE,
  ref_line = 0,
  xlab     = "Mean difference in SBP (mmHg, 95% CI)"
)

The shape legend appears at legend_shape_pos ("bottomright" by default). Set legend_shape_pos = NULL to suppress it.

Adding text columns

Use the cols argument — a named character vector mapping display headers to column names in data — to show formatted statistics alongside the plot.

tc_dat <- data.frame(
  predictor = c("Age (per 10 y)", "Female sex", "BMI (per 5 kg/m\u00b2)",
                "Current smoker", "Physically active"),
  estimate  = c( 0.18, -0.42,  0.11, -0.31,  0.24),
  lower     = c( 0.05, -0.61, -0.04, -0.52,  0.08),
  upper     = c( 0.31, -0.23,  0.26, -0.10,  0.40),
  coef_ci   = c(
    " 0.18 ( 0.05,  0.31)",
    "-0.42 (-0.61, -0.23)",
    " 0.11 (-0.04,  0.26)",
    "-0.31 (-0.52, -0.10)",
    " 0.24 ( 0.08,  0.40)"
  ),
  pval = c("0.006", "<0.001", "0.148", "0.003", "0.009")
)

forrest(
  tc_dat,
  estimate = "estimate",
  lower    = "lower",
  upper    = "upper",
  label    = "predictor",
  header   = "Predictor",
  cols     = c("Coef (95% CI)" = "coef_ci", "P-value" = "pval"),
  widths   = c(2.8, 4, 2.5, 1.2),
  xlab     = "Regression coefficient (95% CI)"
)

Section-level annotations in text columns

When section is active, text columns show "" for section header rows by default. Use section_cols — a named character vector with the same syntax as cols — to populate specific columns in section header rows with a section-level value (e.g. number of studies, total N). The name must match a name in cols; the value is a column in data whose first non-NA entry in each section is used.

sc_dat <- data.frame(
  subgroup  = c("Sex",    "Sex",   "Age group", "Age group", "Age group"),
  label     = c("Female", "Male",  "30\u201349 y", "50\u201369 y", "70+ y"),
  estimate  = c(-0.38,  0.12,  0.22, -0.15, -0.41),
  lower     = c(-0.58, -0.08,  0.02, -0.38, -0.66),
  upper     = c(-0.18,  0.32,  0.42,  0.08, -0.16),
  coef_ci   = c(
    "-0.38 (-0.58, -0.18)",
    " 0.12 (-0.08,  0.32)",
    " 0.22 ( 0.02,  0.42)",
    "-0.15 (-0.38,  0.08)",
    "-0.41 (-0.66, -0.16)"
  ),
  # Constant within each section — section header will show this value
  k_text    = c("k = 2", "k = 2", "k = 3", "k = 3", "k = 3")
)

forrest(
  sc_dat,
  estimate     = "estimate",
  lower        = "lower",
  upper        = "upper",
  label        = "label",
  section      = "subgroup",
  section_cols = c("k" = "k_text"),
  header       = "Subgroup",
  cols         = c("Coef (95% CI)" = "coef_ci", "k" = "k_text"),
  widths       = c(2.5, 4, 2.5, 1.2),
  xlab         = "Regression coefficient (95% CI)"
)

Alternating row stripes

Set stripe = TRUE for a subtle alternating row background that aids readability with many rows.

stripe_dat <- data.frame(
  label    = c(
    "Age (per 10 y)", "Female sex", "BMI (per 5 kg/m\u00b2)",
    "Current smoker", "Physically active",
    "Alcohol intake", "Sleep duration", "Depressive symptoms"
  ),
  estimate = c( 0.42, -0.18,  0.31, -0.07,  0.25, -0.12,  0.19, -0.34),
  lower    = c( 0.22, -0.38,  0.12, -0.24,  0.06, -0.30,  0.01, -0.52),
  upper    = c( 0.62,  0.02,  0.50,  0.10,  0.44,  0.06,  0.37, -0.16)
)

forrest(
  stripe_dat,
  estimate = "estimate",
  lower    = "lower",
  upper    = "upper",
  label    = "label",
  stripe   = TRUE,
  xlab     = "Regression coefficient (95% CI)"
)

Themes

forrest() ships three built-in themes. Pass the theme name as a character string, or supply a named list of style overrides for full control.

theme_dat <- data.frame(
  predictor = c("Age (per 10 y)", "Female sex", "BMI (per 5 kg/m\u00b2)"),
  estimate  = c( 0.18, -0.42,  0.11),
  lower     = c( 0.05, -0.61, -0.04),
  upper     = c( 0.31, -0.23,  0.26)
)

# "minimal" theme — lighter gridlines and softer reference line
forrest(
  theme_dat,
  estimate = "estimate",
  lower    = "lower",
  upper    = "upper",
  label    = "predictor",
  theme    = "minimal",
  title    = 'theme = "minimal"',
  xlab     = "Coefficient (95% CI)"
)

# "classic" theme — dotted gridlines and solid black reference line
forrest(
  theme_dat,
  estimate = "estimate",
  lower    = "lower",
  upper    = "upper",
  label    = "predictor",
  theme    = "classic",
  title    = 'theme = "classic"',
  xlab     = "Coefficient (95% CI)"
)

# Custom theme — override individual style keys
forrest(
  theme_dat,
  estimate = "estimate",
  lower    = "lower",
  upper    = "upper",
  label    = "predictor",
  theme    = list(ref_col = "#e63946", ref_lty = 1L, grid_col = "#eeeeee"),
  title    = "Custom theme (red reference line)",
  xlab     = "Coefficient (95% CI)"
)

Saving plots

Use save_forrest() to export a plot to PDF, PNG, SVG, or TIFF. Pass a zero-argument function that calls forrest().

save_forrest(
  file   = "my_forest_plot.pdf",
  plot   = function() {
    forrest(
      dat,
      estimate = "estimate",
      lower    = "lower",
      upper    = "upper",
      label    = "predictor",
      xlab     = "Regression coefficient (95% CI)"
    )
  },
  width  = 8,
  height = 5
)