#' Compute intermediate ranked retrieval results per group
#'
#' Compute intermediate ranked retrieval results per group such as Discounted
#' Cumulative Gain (DCG), Ideal Discounted Cumulative Gain (IDCG), Normalised
#' Discounted Cumulative Gain (NDCG) and Label Ranking Average Precision (LRAP).
#'
#' @param gold_vs_pred A data.frame as generated by \code{create_comparison},
#'   additionally containing a column \code{"score"}.
#' @param grouping_var A character vector of grouping variables that must be
#'   present in \code{gold_vs_pred}.
#' @inheritParams option_params
#'
#' @return A data.frame with columns \code{"dcg", "idcg", "ndcg", "lrap"}.
#' @export
#'
#' @examples
#'
#' library(casimir)
#'
#' gold <- tibble::tribble(
#'   ~doc_id, ~label_id,
#'   "A", "a",
#'   "A", "b",
#'   "A", "c",
#'   "A", "d",
#'   "A", "e",
#' )
#'
#' pred <- tibble::tribble(
#'   ~doc_id, ~label_id, ~score,
#'   "A", "f", 0.3277,
#'   "A", "e", 0.32172,
#'   "A", "b", 0.13517,
#'   "A", "g", 0.10134,
#'   "A", "h", 0.09152,
#'   "A", "a", 0.07483,
#'   "A", "i", 0.03649,
#'   "A", "j", 0.03551,
#'   "A", "k", 0.03397,
#'   "A", "c", 0.03364
#' )
#'
#' gold_vs_pred <- create_comparison(gold, pred)
#'
#' compute_intermediate_results_rr(
#'   gold_vs_pred,
#'   rlang::syms(c("doc_id"))
#' )
compute_intermediate_results_rr <- function( # nolint styler: off
    gold_vs_pred,
    grouping_var,
    drop_empty_groups = options::opt("drop_empty_groups")) {
  stopifnot(all(c("doc_id", "score", "gold") %in% colnames(gold_vs_pred)))

  my_rep_len <- function(x, length.out, empty_value) {
    if (is.na(length.out)) {
      empty_value
    } else {
      base::rep_len(x, length.out)
    }
  }

  gold_vs_pred |>
    dplyr::group_by(!!!grouping_var, .drop = drop_empty_groups) |>
    dplyr::arrange(dplyr::desc(score)) |>
    dplyr::mutate(n_gold = sum(gold), n_pred = sum(suggested)) |>
    dplyr::mutate(
      discount = log2(seq_len(dplyr::n()) + 1),
      gain = gold & suggested,
      ideal = c(
        my_rep_len(1, n_gold[1], NA_integer_),
        my_rep_len(0, dplyr::n() - n_gold[1], integer(0))
      ),
      score_tp = dplyr::if_else(gold & suggested, score, NA_real_),
      L = rank(-score_tp, ties.method = "max"),
      rank = rank(-score, ties.method = "max")
    ) |>
    dplyr::summarise(
      dcg = sum(gain / discount),
      idcg = sum(ideal / discount),
      ndcg = dplyr::if_else(idcg > 0, dcg / idcg, 1.0),
      lrap = sum((gold & suggested) * L / rank) / n_gold[1]
    )
}
