Fix all measures depedent on the H matrix

ef336b53 · wbonat · 59e71b95 · ef336b53
Commit ef336b53 authored 9 years ago by wbonat
--- a/R/mc_influence.R
+++ b/R/mc_influence.R
-#' Influence measures
+#' @title Influence measures for McGLMs
 #'
-#' @description Compute influence measures for multivariate covariance generalized linear models.
-#' Leverage, DFBETA and Cook's distance for unit sample and observations.
+#' @description Compute influence measures for multivariate covariance
+#' generalized linear models. Leverage, DFBETA and Cook's distance
+#' for unit sample and observations.
+#' @param object An object of \code{mcglm} class.
+#' @param id a vector which identifies the clusters.
+#' The length and order of id should match with the number of
+#' observations.
+#' @return A list with influence measures for cluster and observations.
 #'
-#' @param object An object of mcglm class.
-#' @param id a vector which identifies the clusters. The length and order of id should be the
-#' same as the number of observations. Data are assumed to be sorted so that observations on a cluster
-#' are contiguous rows for all entities in the formula.
-#' @return A matrix. Note that the function assumes that the data are in the correct order.
+#' @author Wagner Hugo Bonat, \email{wbonat@@ufpr.br}
 #' @export

 mc_influence <- function(object, id) {
+  temp_data <- data.frame(object$residuals, id)
+  data_id <- split(temp_data, temp_data$id)
+  n_id <- length(data_id)
+  D <- bdiag(lapply(object$mu_list, function(x) x$D))
+  n_beta <- dim(D)[2]
+  R <- bdiag(lapply(data_id, function(x) {
+    tcrossprod(x[, 1])
+  }))
+  # Leverage for observations
  inv_M <- -object$inv_S_beta
  M <- solve(inv_M)
-    temp_data <- data.frame(res = object$residuals, id)
-    temp_data_group <- split(temp_data, temp_data$id)
-    D <- bdiag(lapply(object$mu_list, function(x) x$D))
-    tD_invC <- t(D) %*% object$inv_C
-    H <- D %*% inv_M %*% tD_invC
-    leverage_obs <- diag(H)
-    leverage_group <- tapply(leverage_obs, id, sum)
-    I <- Diagonal(object$n_obs, 1)
-    n_group <- length(temp_data_group)
-    indexes <- matrix(NA, ncol = 2, nrow = n_group)
-    n_obs_group <- table(id)
-    indexes[1, ] <- c(1, as.numeric(n_obs_group[1]))
-    DFBETA_clust <- list()
-    D_temp <- D[c(indexes[1, 1]:indexes[1, 2]), ]
-    inv_C_temp <- object$inv_C[c(indexes[1, 1]:indexes[1, 2]), c(indexes[1, 1]:indexes[1, 2])]
-    C_temp <- object$C[c(indexes[1, 1]:indexes[1, 2]), c(indexes[1, 1]:indexes[1, 2])]
-    H_temp <- H[c(indexes[1, 1]:indexes[1, 2]), c(indexes[1, 1]:indexes[1, 2])]
-    res_temp <- object$residuals[indexes[1, 1]:indexes[1, 2]]
-    DFBETAOij <- list()
-    padroniza <- function(x, M) {
-        as.numeric((t(as.numeric(x)) %*% M %*% as.numeric(x))/dim(M)[1])
-    }
-    dfbetaOij <- function(D_temp, C_temp, inv_C_temp, res_temp) {
-        DFBETA_temp <- matrix(NA, ncol = dim(D_temp)[2], nrow = dim(D_temp)[1])
-        k = 1
-        for (j in 1:length(res_temp)) {
-            Dij <- D_temp[j, k] - C_temp[k, -j] %*% inv_C_temp[-k, -j] %*% D_temp[-j, ]
-            rij <- res_temp[j] - C_temp[k, -j] %*% inv_C_temp[-k, -j] %*% res_temp[-j]
-            Vij <- C_temp[k, j] - C_temp[k, -j] %*% inv_C_temp[-k, -j] %*% C_temp[-j, k]
-            Hij <- try(Dij %*% object$inv_S_beta %*% t(Dij) %*% solve(Vij), silent = TRUE)
-            if (class(Hij) == "try-error") {
-                DFBETA_temp[j, ] <- NA
+  uni_id <- unique(id)
+  Hi <- list()
+  Di <- list()
+  inv_Ci <- list()
+  for (i in 1:n_id) {
+    idTF <- id == uni_id[i]
+    if(sum(idTF) == 1) {
+      Di[[i]] <- Matrix(D[idTF,], nrow = sum(idTF), ncol = dim(D)[2])
+    } else {
+      Di[[i]] <- D[idTF,]
    }
-            if (class(Hij) != "try-error") {
-                DFBETA_temp[j, ] <- as.numeric(t(object$inv_S_beta %*% t(Dij) %*% (rij/(Vij - (1 - Hij)))))
+    inv_Ci[[i]] <- object$inv_C[idTF,idTF]
+    Hi[[i]] <- Di[[i]]%*%inv_M%*%t(Di[[i]])%*%inv_Ci[[i]]
  }
+  H <- bdiag(Hi)
+  # Leverage for observations
+  Hobs <- diag(H)
+  # Leverage for clusters
+  Hid <- tapply(Hobs, id, sum)
+  # DFBETAs for clusters
+  I <- Diagonal(dim(temp_data)[1], 1)
+  inv_IH <- solve(I - H)
+  DFBETACi <- list()
+  DFBETAOij <- list()
+  DCLSi <- c()
+  for(i in 1:n_id) {
+    idTF <- id == uni_id[i]
+    if(sum(idTF) == 1) {
+      D_i <- Matrix(D[idTF,], nrow = sum(idTF), ncol = n_beta)
+      inv_C_i <- Matrix(object$inv_C[idTF,idTF],ncol = 1, nrow = 1)
+    } else {
+      D_i <- D[idTF,]
+      inv_C_i <- object$inv_C[idTF,idTF]
    }
-        return(DFBETA_temp)
+    inv_IH_i = inv_IH[idTF,idTF]
+    r_i <- object$residuals[idTF]
+        DFBETACi[[i]] <- t(inv_M%*%t(D_i)%*%inv_C_i%*%inv_IH_i%*%r_i)
+    DCLSi[i] <- as.numeric(DFBETACi[[i]]%*%M%*%t(DFBETACi[[i]]))/n_beta
+    if(dim(inv_C_i)[1] == 1) {DFBETAOij[[i]] <- DFBETACi[[i]]
+    } else {
+    DFBETAOij[[i]] <- mc_dfbetaOij(Di = D_i, Ci = object$C[idTF,idTF],
+                                   inv_Ci = inv_C_i, ri = r_i,
+                                   inv_M = inv_M)
    }
-    DFBETA_clust[[1]] <- inv_M %*% t(D_temp) %*% inv_C_temp %*% res_temp
-    DFBETAOij[[1]] <- dfbetaOij(D_temp, C_temp, inv_C_temp, res_temp)
-    for (i in 2:n_group) {
-        indexes[i, ] <- c(indexes[i - 1, ][2] + 1, n_obs_group[i] + indexes[i - 1, ][2])
-        D_temp <- D[c(indexes[i, 1]:indexes[i, 2]), ]
-        inv_C_temp <- object$inv_C[c(indexes[i, 1]:indexes[i, 2]), c(indexes[i, 1]:indexes[i, 2])]
-        C_temp <- object$C[c(indexes[i, 1]:indexes[i, 2]), c(indexes[i, 1]:indexes[i, 2])]
-        H_temp <- H[c(indexes[i, 1]:indexes[i, 2]), c(indexes[i, 1]:indexes[i, 2])]
-        res_temp <- object$residuals[indexes[i, 1]:indexes[i, 2]]
-        D_temp <- matrix(D_temp, nrow = n_obs_group[i])
-        DFBETA_clust[[i]] <- inv_M %*% t(D_temp) %*% inv_C_temp %*% res_temp
-        if (n_obs_group[i] == 1) {
-            DFBETAOij[[i]] <- t(as.matrix(DFBETA_clust[[i]]))
  }
-        if (n_obs_group[i] != 1) {
-            DFBETAOij[[i]] <- dfbetaOij(D_temp, C_temp, inv_C_temp, res_temp)
+  DFBETACi <- do.call(rbind, DFBETACi)
+  DFBETAOij <- do.call(rbind, DFBETAOij)
+  DCOij = do.call(rbind, apply(DFBETAOij, MARGIN = 1,
+                               function(x,M,n_beta)t(x)%*%M%*%x/n_beta,
+                               M = M, n_beta = n_beta))
+  std.error <- coef(object, std.error = TRUE)$Std.error
+  DFBETACi = data.frame(as.matrix(DFBETACi))/std.error
+  DFBETAOij = data.frame(as.matrix(DFBETAOij))/std.error
+  names(DFBETACi) <- do.call(c, object$beta_names)
+  names(DFBETAOij) <- do.call(c, object$beta_names)
+  output_id <- data.frame(DFBETACi, "Leverage" = as.numeric(Hid),
+                          "Cook" = as.numeric(DCLSi))
+  output_obs <- data.frame(DFBETAOij, "Leverage" = as.numeric(Hobs),
+                          "Cook" = as.numeric(DCOij))
+  output <- list("Id" = output_id, "Obs" = output_obs)
+  return(output)
 }
+
+#' @title Influence measures for McGLMs
+#'
+#' @description Compute influence measures for multivariate covariance
+#' generalized linear models. Leverage, DFBETA and Cook's distance
+#' for observations. Auxiliar function for \code{mc_influence}.
+#' @param Di D matrix for the cluster i.
+#' @param Ci C matrix for the cluster i.
+#' @param inv_Ci Inverse of C matrix for the cluster i.
+#' @param ri Residual vector for the cluster i.
+#' @param inv_M Inverse of variance/covariance of regression parameters.
+#' @return Matrix with the DFBETA for observation in the cluster i.
+#'
+#' @author Wagner Hugo Bonat, \email{wbonat@@ufpr.br}
+#' @export
+
+mc_dfbetaOij <- function(Di, Ci, inv_Ci, ri, inv_M) {
+  nobs <- dim(inv_Ci)[1]
+  nbeta <- dim(Di)[2]
+  DFBETAOij <- matrix(NA, ncol = nbeta, nrow = nobs)
+  for (i in 1:nobs) {
+    Ci.jj <- Matrix(Ci[-i,i])
+    Cij.j <- t(Ci.jj)
+    inv_Ci.j <- inv_Ci[-i,-i]
+    Di.j <- Di[-i,]
+    Dij_temp <- Di[i,] - Cij.j%*%inv_Ci.j%*%Di.j
+    rij_temp <- as.numeric(ri[i] - Cij.j%*%inv_Ci.j%*%ri[-i])
+    Cij_temp <- as.numeric(Ci[i,i] - Cij.j%*%inv_Ci.j%*%t(Cij.j))
+    Hij_temp <- as.numeric(Dij_temp%*%inv_M%*%t(Dij_temp)%*%solve(Cij_temp))
+    part2 <- Matrix(rij_temp/(Cij_temp*(1-Hij_temp)))
+    DFBETAOij[i,] <- as.numeric(t(inv_M%*%t(Dij_temp)%*%part2))
  }
-    DFBETA <- lapply(DFBETA_clust, as.matrix)
-    DFBETA <- lapply(DFBETA, t)
-    DFBETA <- plyr::ldply(DFBETA, data.frame)
-    names(DFBETA) <- object$beta_names[[1]]
-    DFBETAOij <- plyr::ldply(DFBETAOij, data.frame)
-    names(DFBETAOij) <- object$beta_names[[1]]
-    DCLSi <- apply(as.matrix(DFBETA), MARGIN = 1, FUN = padroniza, M = M)
-    DCLOij <- apply(as.matrix(DFBETAOij), MARGIN = 1, FUN = padroniza, M = M)
-    std.error <- coef(object, std.error = TRUE, type = "beta")$Std.error
-    DFBETA_temp <- t(apply(DFBETA, MARGIN = 1, FUN = function(x, std.error) {
-        as.numeric(x/std.error)
-    }, std.error = std.error))
-    DFBETAOij_temp <- t(apply(DFBETAOij, MARGIN = 1, FUN = function(x, std.error) {
-        as.numeric(x/std.error)
-    }, std.error = std.error))
-    output_clust <- data.frame(Leverage = leverage_group, DFBETA = DFBETA_temp, Cook = DCLSi)
-    output_obs <- data.frame(Leverage = leverage_obs, DFBETA = DFBETAOij_temp, Cook = DCLOij)
-    output <- list(Id = output_clust, Observations = output_obs)
-    return(output)
+  return(DFBETAOij)
 }
+
+
+