# Computes Cliff's independent d-statistic for the independent case.
# main functions are cliff_independent and weighted.cliff_independent

#' Computes the independent groups d-statistic comparing the two vectors provided.
#'
#' @param x vector of scores for first group
#' @param y vector of scores for second group
#' @return list containing d, SE(d) and z(d)
#' @export
Cliff_independent <- function(x, y) {
  n <- length(x)
  m <- length(y)
  d = matrix(nrow = n, ncol=m)

  for (i in 1:n) {
    for (j in 1:m) {
      d[i, j] = sign(x[i] - y[j])
    }
  }
  Cliff_independent_from_matrix(d)
}


#' Computes independent group's d-statistic from the matrix of frequencies provided.
#'
#' Computes intermediate d-matrix, so can be slow for large N
#' @param n matrix of counts
#' @return list containing d, SE(d) and z(d)
#' @export
Cliff_independent_from_table <- function(n) {
  n_cells <- length(n)
  x <- vector("double", n_cells)
  y <- vector("double", n_cells)
  w <- vector("double", n_cells)
  index <- 1
  for (i in 1:nrow(n)) {
    for (j in 1:ncol(n)) {
      x[index] <- i
      y[index] <- j
      w[index] <- n[i, j]
      index <- index + 1
    }
  }
  Cliff_independent_weighted(x, w, y, w)
}


#' Computes d-statistic from dominance matrix provided.
#'
#' @importFrom stats var
#' @param d N X M dominance matrix
#' @return list containing d, SE(d) and z(d)
#' @export
Cliff_independent_from_matrix <- function(d) {
  d_i = rowSums(d) / ncol(d)
  d_bar <- mean(d_i)

  d_j <- colSums(d) / nrow(d)

  sigma_squared_dj <- var(d_j)
  sigma_squared_di <- var(d_i)
  n = nrow(d)
  m = ncol(d)
  sigma_squared_dij <- sum((d - d_bar)^2) / ((n - 1) * (m - 1))

  se_squared  <- m * sigma_squared_di / (n * (m - 1)) +
      n * sigma_squared_dj / (m * (n - 1)) -
      sigma_squared_dij / (n * m)
  se <- sqrt(se_squared)

  min_se <- (1 - d_bar^2) / (m * n - 1)
  if (is.na(se_squared) || (se_squared < min_se)) {
    se_squared <- min_se
  }

  list(d_bar=d_bar, sigma_d_bar=se, z_d_bar=d_bar / se)
}


#' Computes d-statistic based on scores and integer weights(frequencies) for each group.
#'
#' @param x first vector of scores
#' @param w_x weights associated with first vector of scores
#' @param y second vector of scores
#' @param w_y weights associated with second vector of scores
#' @return list containing d, SE(d) and z(d)
#' @export
Cliff_independent_weighted <- function(x, w_x, y, w_y) {
  # eliminate observations with nonpositive weight
  nonzero_w_x <- w_x > 0
  nonzero_w_y <- w_y > 0
  x <- x[nonzero_w_x]
  w_x <- w_x[nonzero_w_x]
  y <- y[nonzero_w_y]
  w_y <- w_y[nonzero_w_y]

  n <- length(x)
  m <- length(y)

  if (n == 0 || m == 0) {
    return(list(d_bar=NA, sigma_d_bar= NA, z_d_bar=NA))
  }

  d = matrix(nrow=n, ncol=m)
  wd = matrix(nrow=n, ncol=m)
  w_ij = matrix(nrow=n, ncol=m)

  index <- 0
  for (i in 1:n) {
    for (j in 1:m) {
      d[i, j] = sign(x[i] - y[j])
      wd[i, j] = d[i, j] * w_x[i] * w_y[j]
      index <- index + 1
      w_ij[index] <- w_x[i] * w_y[j]
    }
  }

  d_j <- colSums(wd) / (w_y * sum(w_x))
  d_i <- rowSums(wd) / (w_x * sum(w_y))
  d_bar <- sum(wd) / (sum(w_x) * sum(w_y))

  sigma_squared_dj <- weighted_var(d_j, w_y)
  sigma_squared_di <- weighted_var(d_i, w_x)

  w1Sum = sum(w_x)
  w2Sum = sum(w_y)
  sigma_squared_dij <- weighted_var(d - d_bar, w_ij) *
    (sum(w_ij) - 1) / ((w1Sum - 1) * (w2Sum- 1))
  sigma_squared_dij_hat <- weighted_var(d - d_bar, w_ij)

  se_squared  <- w2Sum * sigma_squared_di / (w1Sum * (w2Sum - 1)) +
    w1Sum * sigma_squared_dj / (w2Sum* (w1Sum - 1)) -
    sigma_squared_dij / (w1Sum * w2Sum)

  se_squared_alt <- ((m - 1) * sigma_squared_di +
                       (n - 1) * sigma_squared_dj +
                       sigma_squared_dij_hat) / (n * m)

  min_se <- (1 - d_bar^2) / (w1Sum * w2Sum - 1)
  if (is.na(se_squared) || (se_squared < min_se)) {
    se_squared <- min_se
  }

  se <- sqrt(se_squared)
  z <- d_bar / se
  list(d_bar=d_bar, sigma_d_bar= se, z_d_bar=z)
}
