# Fits selected of McCullagh (1980) ordinal regression models.


#' Fit location model
#'
#' @param n matrix of observed counts
#' @param x design matrix for regression model
#' @param max_iter maximum number of Fisher scoring iterations
#' @param verbose logical: should cycle-by-cycle info be printed out?
#' Default value is FALSE, do not print
#' @returns a list containing
#'    beta: regression parameter estimates
#'    se: matrix of estimated standard errors
#'    cov: covariance matrix of parameter estimates
#'    g_squared: G^2 likelihood ratio chi-square for model
#'    chisq: Pearson chi-square for model
#'    df: degrees of freedom
#' @export
McCullagh_fit_location_regression_model <- function(n, x, max_iter=5, verbose=FALSE) {
  s <- nrow(n)
  c <- ncol(n)
  y <- 1:c
  n_param <- ncol(x) - (c - 1)
  t <- 1
  v <- n_param
  beta <- McCullagh_initialize_beta(n, c, n_param)

  gamma <- McCullagh_compute_gamma(x, beta, s, c)
  phi <- matrix(0.0, nrow=s, ncol=c - 1)
  for (i in 1:s) {
    for (j in 1:(c - 1)) {
      phi[i, j] <- McCullagh_compute_phi(gamma[i,], j)
    }
  }

  # test of fit of dumping data from Agresti (1984).
  logL <- McCullagh_compute_log_l(n, phi)
  if (verbose) {
    message(paste(0, " ", logL, "\n"))
  }

  for (iter in 1:max_iter) {
    g <- McCullagh_derivative_log_l_wrt_beta(n, x, gamma)
    h <- McCullagh_second_order_log_l_wrt_beta_2(n, x, gamma)
    update <- solve(h, g)

    beta <- beta - update
    gamma <- McCullagh_compute_gamma(x, beta, s, c)

    phi <- matrix(0.0, nrow=s, ncol=c - 1)
    for (i in 1:s) {
      for (j in 1:(c - 1)) {
        phi[i, j] <- McCullagh_compute_phi(gamma[i,], j)
      }
    }

    logL <- McCullagh_compute_log_l(n, phi)
    if (verbose) {
      message(paste(iter, " ", logL, "\n"))
    }
  }

  cov <- solve(-h)
  se <- sqrt(diag(cov))

  result <- McCullagh_compute_pi_from_beta(n, x, beta)
  N <- sum(n)
  marginal_pi <- rowSums(n) / N
  pi <- result$pi * marginal_pi
  g_squared <- likelihood_ratio_chisq(n, pi)
  chisq <- pearson_chisq(n, pi)
  df <- nrow(x) - ncol(x)
  list(beta=beta, se=se, cov=cov, g_squared=g_squared, chisq=chisq, df=df)
}


#' Computes cumulative sums for rows,
#'
#' @param n matrix of observed counts
#' @returns R where R[i, ] contains cumulative sum of n[i,]
McCullagh_compute_cumulative_sums <- function(n) {
  R <- matrix(0.0, nrow=nrow(n), ncol = ncol(n))
  for (i in 1:nrow(n)) {
    R[i, ] <- cumsum(n[i,])
  }
  R
}


#' Computes lambda, log of cumulative odds.
#'
#' @param n matrix of observed counts
#' @param use_half logical whether of not to add half to the cell count before taking
#' the logit. Default value is TRUE.
McCullagh_compute_lambda <- function(n, use_half=TRUE) {
  lambda <- matrix(0.0, nrow=nrow(n), ncol=ncol(n) - 1)
  R <- McCullagh_compute_cumulative_sums(n)
  for (i in 1:nrow(n)) {
    n_i <- R[i, ncol(R)]
    lambda[i, ] <- McCullagh_logits(R[i, ], use_half=use_half)
  }
  lambda
}


#' Computes regression weights w;
#' R_dot_j * (N - R_dot_j[j])  * (n_do_j[j] a= na_dot_j[j+ 1] )
#'
#' @param n matrix of observed counts
#' @returns list of w, and sum(w)
#' @export
McCullagh_compute_regression_weights <- function(n) {
  n_dot_dot <- sum(n)
  R <- McCullagh_compute_cumulative_sums(n)
  R_dot_j <- colSums(R)
  n_dot_j <- colSums(n)
  w <- vector("double", ncol(n))
  for (j in 1:(ncol(n) - 1)) {
    w[j] <- R_dot_j[j] * (n_dot_dot - R_dot_j[j]) * (n_dot_j[j] + n_dot_j[j + 1])
  }
  sum_w <- sum(w)
  w <- w / sum(w)
  list(w=w, sum_w=sum_w)
}


#' Computed cumulative logits.
#'
#' @param cumulative vector of cumulative counts
#' @param use_half logical indicting whether or not to add 0.5 to numerator and denominator
#'counts before computing logits,  Default value is TRUE, add 0.5.
McCullagh_logits <- function(cumulative, use_half=TRUE) {
  k = length(cumulative)
  p <- k - 1
  N <- cumulative[k]
  result <- vector("double", p)
  for (j in 1:p) {
    if (use_half) {
      result[j] = log((cumulative[j] + 0.5) / (N - cumulative[j] + 0.5))
    } else {
      result[j] = log((cumulative[j] ) / (N - cumulative[j] ))
    }
  }
  result
}


#' Computes Z, where z is w * lambda.
#'
#' @param lambda cumulative logits
#' @param w weights to apply to the logits
#' @returns z, sum pf product of lambda %*% w
McCullagh_compute_z <- function(lambda, w) {
  z <- vector("double", nrow(lambda))
  for (i in 1:nrow(lambda)) {
    z[i] <- 0.0
    for (j in 1:ncol(lambda)) {
      z[i] <- z[i] + w[j] * lambda[i, j]
    }
  }
  z
}


#' Computes the proportional hazards.
#'
#' @param n matrix of observed counts
#' @returns loga(-log(survival))
#' @export
McCullagh_proportional_hazards <- function(n) {
  cumulatives <- McCullagh_compute_cumulative_sums(n)
  N <- sum(n)
  gamma <- cumulatives / (N / 2.0)
  survivals <- 1.0 - gamma

  result <- matrix(0.0, nrow=nrow(n), ncol=ncol(n) - 1)
  for (i in 1:nrow(survivals)) {
    for (j in 1:(ncol(survivals) - 1)) {
      result[i, j] <- log(-log(survivals[i, j]))
    }
  }
  result
}


#' Computes the log(likelihood) for the general nonlinear model.
#'
#' @param n matrix of observed counts
#' @param phi vector of model-based parameters
#' @returns log(likelihood)
#' @export
McCullagh_compute_log_l <- function(n, phi) {
  N <- sum(n)
  R <- McCullagh_compute_cumulative_sums(n)
  n_bar <- rowSums(n)
  z <- R / n_bar

  g <- matrix(0.0, nrow=nrow(phi), ncol=ncol(phi))
  for (i in 1:nrow(phi)) {
    for (j in 1:ncol(phi)) {
      g[i, j] <- log(1.0 + exp(phi[i, j]))
    }
  }

  logL <- 0.0
  for (i in 1:nrow(phi)) {
    increment <- 0.0
    for (j in 1:ncol(phi)) {
      increment <- increment + z[i, j] * phi[i, j] - z[i, j + 1] * g[i, j]
    }
    logL <- logL + n_bar[i] * increment
  }
  logL
}


#' Computes phi based on gamma
#'
#' @param gamma vector of gamma parameters
#' @param j index of phi to compute
#' @returns phi[j]
McCullagh_compute_phi <- function(gamma, j) {
  log(gamma[j] / (gamma[j + 1] - gamma[j]))
}


#' Derivative of log(likelihood) wrt phi[i, j]
#'
#' @param n matrix of observed counts
#' @param phi matrix of phi-values
#' @param i row index of phi
#' @param j column index of phi
#' @returns derivative
McCullagh_derivative_log_l_wrt_phi <- function(n, phi, i, j) {
  R <- McCullagh_compute_cumulative_sums(n)
  n_bar <- rowSums(n)
  z <- R / n_bar

  der <- n_bar[i] * (z[i, j] - z[i, j + 1] * expit(phi[i, j]))
}


#' Derivative of phi wrt gamma.
#'
#' @param gamma vector of gamma values
#' @param j index of gamma for which to compute the derivative
#' @returns derivative
McCullagh_derivative_phi_wrt_gamma <- function(gamma, j) {
  (1.0 / gamma[j]) * (1.0 + gamma[j] / (gamma[j + 1] - gamma[j]))
}


#' Derivative of y wrt gamma.
#'
#' Assumes a logit link is being used.
#' @param gamma matrix of gamma values
#' @param i row index of gamma
#' @param j column index of gamma
#' @returns derivative
McCullagh_derivative_gamma_wrt_y <- function(gamma, i, j) {
  g <- gamma[i, j]
  g * (1.0 - g)
}


#' Computes gamma from x and beta
#'
#' @param x predictor variables
#' @param beta vector of regression coefficients
#' @param s number of rows in the table
#' @param c number of score levels in table
#' @returns vector of model-based gamma coefficients
McCullagh_compute_gamma <- function(x, beta, s, c) {
  g <- x %*% beta
  gamma <- matrix(0.0, nrow=s, ncol=c)
  index <- 1
  for (i in 1:s) {
    for (j in 1:(c - 1)) {
      gamma[i, j] <- expit(g[index])
      index <- index + 1
    }
    gamma[i, c] <- 1.0
  }
  gamma
}


#' Compute the cell probabilities pi from gamma.
#'
#' @param gamma matrix of gamma values
#' @returns c X c matrix of p-values pi
McCullagh_compute_pi_from_gamma <- function(gamma) {
  epsilon = 1.0e-12
  pi <- matrix(0.0, nrow=nrow(gamma), ncol=ncol(gamma))
  for (i in 1:nrow(gamma)) {
    for(j in 1:(ncol(gamma) - 1)) {
      pi[i, j + 1] <- gamma[i, j + 1] - gamma[i, j]
      if (pi[i, j + 1] < epsilon) {
        pi[i, j + 1] <- epsilon
      }
    }
    pi[i, 1] <- gamma[i, 1]
  }
  pi
}


#' Initializes the beta vector.
#'
#' @param n matrix of observed counts
#' @param c number of score levels in table
#' @param v number of levels of beta beyond c
#' @returns initialized beta vector
#' @export
McCullagh_initialize_beta <- function(n, c, v) {
  N <- sum(n)
  R <- McCullagh_compute_cumulative_sums(n)
  r_bar <- colSums(R) / N

  beta <- vector("double", (c - 1) + v)
  for (j in 1:(c - 1)) {
    beta[j] <- log((r_bar[j] / (1.0 - r_bar[j])))
  }
  if (0 < v) {
    beta[c] <- 0.0
  }
  beta
}


#' Computes matrix of p-values pi based on x and current value of beta.
#'
#' @param n matrix of observed counts
#' @param x design matrix
#' @param beta current values of location model regression parameters
#' @returns matrix of model-based pi values
McCullagh_compute_pi_from_beta <- function(n, x, beta) {
  N <- sum(n)
  n_bar <- rowSums(n)
  L <- matrix(x %*% beta, nrow=nrow(n), byrow=TRUE)
  fPrime = expit(L)
  s <- nrow(n)
  c <- ncol(n)
  m <- matrix(0.0, nrow=s, ncol=c)
  f <- matrix(1.0, nrow=s, ncol=c)
  for (i in 1:s) {
    f[i, 1:(c - 1)] <- fPrime[i, 1:(c - 1)]
  }

  for (i in 1:s) {
    for (j in 1:c) {
      if (j == 1) {
        m[i, j] <- f[i, j] * n_bar[i]
      } else {
        m[i, j] <- (f[i, j] - f[i, j - 1]) * n_bar[i]
      }
    }
  }
  pi <- m / n_bar
  list(pi=pi, m=m)
}


#' Derivative of log(likelihood) wrt beta, as given in appendix of McCullagh.
#'
#' McCullagh, P. (1980). Regression  models for ordinal data. Journal of the Royal Stastical
#' Society,  Series B, 42(2), 109-142.
#' With assist from appendix of Agresti, (1984).
#' Agresti, A. (1984). Analysis of ordinal categorical data.  New York, Wiley, p. 244-246.
#' @param n matrix of observed counts
#' @param x design matrix for location
#' @param gamma matrix of model-based cumulative logits
#' @returns derivative
McCullagh_derivative_log_l_wrt_beta <- function(n, x, gamma) {
  R <- McCullagh_compute_cumulative_sums(n)
  n_bar <- rowSums(n)
  z <- R / n_bar

  p <- ncol(gamma) - 1  # score levels (columns) in table - 1
  q <- vector("double", ncol(x))
  for (k in 1:ncol(x)) {  # over predictors beta[k]
    q[k] <- 0.0
    for (i in 1:nrow(gamma)) {  # number of rows in table
      q_i <- 0.0
      for (j in 1:p) {  # number of column/category
        j1 <- (i - 1) * p + j
        if (j1 < nrow(x)) {
          a_ijk <- gamma[i, j] * (1.0 - gamma[i, j]) * x[j1, k] - gamma[i, j] * (1.0 - gamma[i, j + 1]) * x[j1 + 1, k]
        } else {
          a_ijk <- gamma[i, j] * (1.0 - gamma[i, j]) * x[j1, k]
        }
        q_i <- q_i + (z[i, j] * gamma[i, j + 1] - z[i, j + 1] * gamma[i, j]) * a_ijk * McCullagh_v_inverse(gamma, i, j) / gamma[i, j + 1]
      }
      q[k] <- q[k] + n_bar[i] * q_i
    }
  }
  q
}


#' Expected values of second order derivatives of log(likelihood) wrt beta.
#'
#' Appendix of McCullagh, P. (1980). Regression models for ordinal data. Journal of the Royal
#' Statistical Society, Series B, 42(2), 109-142. and appendix B3 of Agresti, A. (1984). Analysis
#' of ordinal categorical data, New York, Wiley, p. 242-244.
#'
#' @param n matrix of observed counts
#' @param x design matrix for location model
#' @param gamma current value of model-based cumulative logits.
#' @returns matrix of second order partial derivatives
McCullagh_second_order_log_l_wrt_beta_2 <- function(n, x, gamma) {
  N <- sum(n)
  R <- McCullagh_compute_cumulative_sums(n)
  n_bar <- rowSums(n)
  z <- R / n_bar

  c <- ncol(gamma) - 1
  p <- ncol(x)
  h <- matrix(0.0, nrow=p, ncol=p)
  for (r in 1:p) {
    for (s in 1:p) {
      h[r, s] <- 0.0
      for (i in 1:nrow(gamma)) {
        h_i <- 0.0
        for (j in 1:c) {
          j1 <- (i - 1) * c + j
          if (j1 < nrow(x)) {
            a_ijr <- gamma[i, j] * (1.0 - gamma[i, j]) * x[j1, r] - gamma[i, j] * (1.0 - gamma[i, j + 1]) * x[j1 + 1, r]
            a_ijs <- gamma[i, j] * (1.0 - gamma[i, j]) * x[j1, s] - gamma[i, j] * (1.0 - gamma[i, j + 1]) * x[j1 + 1, s]
          } else {
            a_ijr <- gamma[i, j] * (1.0 - gamma[i, j]) * x[j1, r]
            a_ijs <- gamma[i, j] * (1.0 - gamma[i, j]) * x[j1, s]
          }
          h_i <- h_i + McCullagh_v_inverse(gamma, i, j) * a_ijr * a_ijs
        }
        h[r, s] <- h[r, s] - n_bar[i] * h_i
      }
    }
  }
  h
}


#' Derivative of gamma wrt phi.
#'
#' Version given in McCullagh isn't right.
#' @param gamma vector of cumulative logits
#' @param j index of derivative sought
#' @param phi scalar phi taking derivative wrt
#' @returns derivative
McCullagh_derivative_gamma_wrt_phi <- function(gamma, j, phi) {
  gamma[j] * (1.0 - expit(phi))
}


#' Derivative of gamma j + 1 wrt phi.
#'
#' @param gamma vector
#' @param j index of gamma to take derivative of
#' @param phi scalar phi taking derivative wrt
#' @returns derivative
McCullagh_derivative_gamma_plus_1_wrt_phi <- function(gamma, j, phi) {
  gamma[j] - gamma[j + 1]
}


#' Computes value of gamma from phi. Inverse of usual computation.
#'
#' @param phi value to compute from
#' @param j index to use in computation
#' @param gamma vector of gamma values (model-based cumulative logits)
#' @return gamma[j] given phi and gamma[j + 1]
McCullagh_compute_gamma_from_phi <- function(phi, j, gamma) {
  gamma[j + 1] * expit(phi)
}


#' Computes value of gamma[j + 1] from phi.
#'
#' @param phi value used in computation
#' @param j index to use in computation
#' @param gamma vector of gamma values (model-based cumulative logits)
#' @return gamma[j + 1] given phi and gamma[j]
McCullagh_compute_gamma_plus_1_from_phi <- function(phi, j, gamma) {
  gamma[j] / expit(phi)
}


#' Compute v_inverse (from appendix).
#'
#' @param gamma matrix of cumulative logits
#' @param i row index
#' @param j column index
#' @returns V^(-1) : d phi / d gamma[i, j]
McCullagh_v_inverse <- function(gamma, i, j) {
  gamma[i, j + 1] /  (gamma[i, j] * (gamma[i, j + 1] - gamma[i, j]))
}



#' Derivative of log(likelihood) wrt parameters.
#'
#' @param n matrix of observed counts
#' @param x design matrix for location model
#' @param beta vector of regression parameters for location model
#' @returns gradient vector
McCullagh_derivative_log_l_wrt_params <- function(n, x, beta) {
  n_beta <- length(beta)
  s <- nrow(n)
  c <- ncol(n)
  gamma <- McCullagh_compute_gamma(x, beta, s, c)

  g <- McCullagh_derivative_log_l_wrt_beta(n, x, gamma)
  g
}


#' Expected second order derivatives of log(likelihood)
#'
#' @param n matrix of observed counts
#' @param x design matrix for location model
#' @param beta vector of regression parameters for location model
#' @returns matrix of expected second derivatives
McCullagh_second_order_log_l_wrt_parms <- function(n, x, beta) {
  n_beta <- length(beta)
  n_param <- n_beta
  s <- nrow(n)
  c <- ncol(n)
  gamma <- McCullagh_compute_gamma(x, beta, s, c)
  phi <- McCullagh_compute_phi_matrix(gamma)
  h <- McCullagh_second_order_log_l_wrt_beta_2(n, x, gamma)
  h
}


#' Initialize design matrix for location model.
#'
#' This is the simplest possible implementation, that fits thresholds
#' and a single group contrast.  More complex problems will implement
#' the matrix X themselves.
#' @param s number of levels of stratification variable
#' @param c number of score levels
#' @param v number of predictors above thresholds
#' @returns design matrix X
#' @export
McCullagh_initialize_x <- function(s, c, v) {
  x <- matrix(0.0, nrow=s * (c - 1), ncol=(c - 1) + v)
  index <- 1
  for (u in 1:s) {
    for (j in 1:(c - 1)) {
      x[index, j] <- 1.0
      index <- index + 1
    }
  }

  if (0 < v) {
    target <- c - 1 + v
    u_bar <- (s + 1) / 2
    index <- 1
    for (u in 1:s) {
      for (j in 1:(c - 1)) {
        x[index,  target] <- u - u_bar
        index <- index + 1
      }
    }
  }
  x
}


#' Compute matrix of model-based logits
#'
#' @param gamma matrix of model-based cumulative odds
#' @returns matrix of model-based logits
McCullagh_compute_phi_matrix <- function(gamma) {
  s <- nrow(gamma)
  c <- ncol(gamma)
  phi <- matrix(0.0, nrow=s, ncol=c - 1)

  for (i in 1:s) {
    for (j in 1:(c - 1)) {
      phi[i, j] <- McCullagh_compute_phi(gamma[i, ], j)
    }
  }
  phi
}
