diff --git a/R/mc_influence.R b/R/mc_influence.R index 51e71ba81b3dd42a6438f44ec2f9eedc29a71f4d..437c6b52d53be45b07f54900b40f926b973cca32 100644 --- a/R/mc_influence.R +++ b/R/mc_influence.R @@ -1,92 +1,125 @@ -#' Influence measures +#' @title Influence measures for McGLMs #' -#' @description Compute influence measures for multivariate covariance generalized linear models. -#' Leverage, DFBETA and Cook's distance for unit sample and observations. +#' @description Compute influence measures for multivariate covariance +#' generalized linear models. Leverage, DFBETA and Cook's distance +#' for unit sample and observations. +#' @param object An object of \code{mcglm} class. +#' @param id a vector which identifies the clusters. +#' The length and order of id should match with the number of +#' observations. +#' @return A list with influence measures for cluster and observations. #' -#' @param object An object of mcglm class. -#' @param id a vector which identifies the clusters. The length and order of id should be the -#' same as the number of observations. Data are assumed to be sorted so that observations on a cluster -#' are contiguous rows for all entities in the formula. -#' @return A matrix. Note that the function assumes that the data are in the correct order. +#' @author Wagner Hugo Bonat, \email{wbonat@@ufpr.br} #' @export mc_influence <- function(object, id) { - inv_M <- -object$inv_S_beta - M <- solve(inv_M) - temp_data <- data.frame(res = object$residuals, id) - temp_data_group <- split(temp_data, temp_data$id) - D <- bdiag(lapply(object$mu_list, function(x) x$D)) - tD_invC <- t(D) %*% object$inv_C - H <- D %*% inv_M %*% tD_invC - leverage_obs <- diag(H) - leverage_group <- tapply(leverage_obs, id, sum) - I <- Diagonal(object$n_obs, 1) - n_group <- length(temp_data_group) - indexes <- matrix(NA, ncol = 2, nrow = n_group) - n_obs_group <- table(id) - indexes[1, ] <- c(1, as.numeric(n_obs_group[1])) - DFBETA_clust <- list() - D_temp <- D[c(indexes[1, 1]:indexes[1, 2]), ] - inv_C_temp <- object$inv_C[c(indexes[1, 1]:indexes[1, 2]), c(indexes[1, 1]:indexes[1, 2])] - C_temp <- object$C[c(indexes[1, 1]:indexes[1, 2]), c(indexes[1, 1]:indexes[1, 2])] - H_temp <- H[c(indexes[1, 1]:indexes[1, 2]), c(indexes[1, 1]:indexes[1, 2])] - res_temp <- object$residuals[indexes[1, 1]:indexes[1, 2]] - DFBETAOij <- list() - padroniza <- function(x, M) { - as.numeric((t(as.numeric(x)) %*% M %*% as.numeric(x))/dim(M)[1]) + temp_data <- data.frame(object$residuals, id) + data_id <- split(temp_data, temp_data$id) + n_id <- length(data_id) + D <- bdiag(lapply(object$mu_list, function(x) x$D)) + n_beta <- dim(D)[2] + R <- bdiag(lapply(data_id, function(x) { + tcrossprod(x[, 1]) + })) + # Leverage for observations + inv_M <- -object$inv_S_beta + M <- solve(inv_M) + uni_id <- unique(id) + Hi <- list() + Di <- list() + inv_Ci <- list() + for (i in 1:n_id) { + idTF <- id == uni_id[i] + if(sum(idTF) == 1) { + Di[[i]] <- Matrix(D[idTF,], nrow = sum(idTF), ncol = dim(D)[2]) + } else { + Di[[i]] <- D[idTF,] } - dfbetaOij <- function(D_temp, C_temp, inv_C_temp, res_temp) { - DFBETA_temp <- matrix(NA, ncol = dim(D_temp)[2], nrow = dim(D_temp)[1]) - k = 1 - for (j in 1:length(res_temp)) { - Dij <- D_temp[j, k] - C_temp[k, -j] %*% inv_C_temp[-k, -j] %*% D_temp[-j, ] - rij <- res_temp[j] - C_temp[k, -j] %*% inv_C_temp[-k, -j] %*% res_temp[-j] - Vij <- C_temp[k, j] - C_temp[k, -j] %*% inv_C_temp[-k, -j] %*% C_temp[-j, k] - Hij <- try(Dij %*% object$inv_S_beta %*% t(Dij) %*% solve(Vij), silent = TRUE) - if (class(Hij) == "try-error") { - DFBETA_temp[j, ] <- NA - } - if (class(Hij) != "try-error") { - DFBETA_temp[j, ] <- as.numeric(t(object$inv_S_beta %*% t(Dij) %*% (rij/(Vij - (1 - Hij))))) - } - } - return(DFBETA_temp) + inv_Ci[[i]] <- object$inv_C[idTF,idTF] + Hi[[i]] <- Di[[i]]%*%inv_M%*%t(Di[[i]])%*%inv_Ci[[i]] + } + H <- bdiag(Hi) + # Leverage for observations + Hobs <- diag(H) + # Leverage for clusters + Hid <- tapply(Hobs, id, sum) + # DFBETAs for clusters + I <- Diagonal(dim(temp_data)[1], 1) + inv_IH <- solve(I - H) + DFBETACi <- list() + DFBETAOij <- list() + DCLSi <- c() + for(i in 1:n_id) { + idTF <- id == uni_id[i] + if(sum(idTF) == 1) { + D_i <- Matrix(D[idTF,], nrow = sum(idTF), ncol = n_beta) + inv_C_i <- Matrix(object$inv_C[idTF,idTF],ncol = 1, nrow = 1) + } else { + D_i <- D[idTF,] + inv_C_i <- object$inv_C[idTF,idTF] } - DFBETA_clust[[1]] <- inv_M %*% t(D_temp) %*% inv_C_temp %*% res_temp - DFBETAOij[[1]] <- dfbetaOij(D_temp, C_temp, inv_C_temp, res_temp) - for (i in 2:n_group) { - indexes[i, ] <- c(indexes[i - 1, ][2] + 1, n_obs_group[i] + indexes[i - 1, ][2]) - D_temp <- D[c(indexes[i, 1]:indexes[i, 2]), ] - inv_C_temp <- object$inv_C[c(indexes[i, 1]:indexes[i, 2]), c(indexes[i, 1]:indexes[i, 2])] - C_temp <- object$C[c(indexes[i, 1]:indexes[i, 2]), c(indexes[i, 1]:indexes[i, 2])] - H_temp <- H[c(indexes[i, 1]:indexes[i, 2]), c(indexes[i, 1]:indexes[i, 2])] - res_temp <- object$residuals[indexes[i, 1]:indexes[i, 2]] - D_temp <- matrix(D_temp, nrow = n_obs_group[i]) - DFBETA_clust[[i]] <- inv_M %*% t(D_temp) %*% inv_C_temp %*% res_temp - if (n_obs_group[i] == 1) { - DFBETAOij[[i]] <- t(as.matrix(DFBETA_clust[[i]])) - } - if (n_obs_group[i] != 1) { - DFBETAOij[[i]] <- dfbetaOij(D_temp, C_temp, inv_C_temp, res_temp) - } + inv_IH_i = inv_IH[idTF,idTF] + r_i <- object$residuals[idTF] + DFBETACi[[i]] <- t(inv_M%*%t(D_i)%*%inv_C_i%*%inv_IH_i%*%r_i) + DCLSi[i] <- as.numeric(DFBETACi[[i]]%*%M%*%t(DFBETACi[[i]]))/n_beta + if(dim(inv_C_i)[1] == 1) {DFBETAOij[[i]] <- DFBETACi[[i]] + } else { + DFBETAOij[[i]] <- mc_dfbetaOij(Di = D_i, Ci = object$C[idTF,idTF], + inv_Ci = inv_C_i, ri = r_i, + inv_M = inv_M) } - DFBETA <- lapply(DFBETA_clust, as.matrix) - DFBETA <- lapply(DFBETA, t) - DFBETA <- plyr::ldply(DFBETA, data.frame) - names(DFBETA) <- object$beta_names[[1]] - DFBETAOij <- plyr::ldply(DFBETAOij, data.frame) - names(DFBETAOij) <- object$beta_names[[1]] - DCLSi <- apply(as.matrix(DFBETA), MARGIN = 1, FUN = padroniza, M = M) - DCLOij <- apply(as.matrix(DFBETAOij), MARGIN = 1, FUN = padroniza, M = M) - std.error <- coef(object, std.error = TRUE, type = "beta")$Std.error - DFBETA_temp <- t(apply(DFBETA, MARGIN = 1, FUN = function(x, std.error) { - as.numeric(x/std.error) - }, std.error = std.error)) - DFBETAOij_temp <- t(apply(DFBETAOij, MARGIN = 1, FUN = function(x, std.error) { - as.numeric(x/std.error) - }, std.error = std.error)) - output_clust <- data.frame(Leverage = leverage_group, DFBETA = DFBETA_temp, Cook = DCLSi) - output_obs <- data.frame(Leverage = leverage_obs, DFBETA = DFBETAOij_temp, Cook = DCLOij) - output <- list(Id = output_clust, Observations = output_obs) - return(output) + } + DFBETACi <- do.call(rbind, DFBETACi) + DFBETAOij <- do.call(rbind, DFBETAOij) + DCOij = do.call(rbind, apply(DFBETAOij, MARGIN = 1, + function(x,M,n_beta)t(x)%*%M%*%x/n_beta, + M = M, n_beta = n_beta)) + std.error <- coef(object, std.error = TRUE)$Std.error + DFBETACi = data.frame(as.matrix(DFBETACi))/std.error + DFBETAOij = data.frame(as.matrix(DFBETAOij))/std.error + names(DFBETACi) <- do.call(c, object$beta_names) + names(DFBETAOij) <- do.call(c, object$beta_names) + output_id <- data.frame(DFBETACi, "Leverage" = as.numeric(Hid), + "Cook" = as.numeric(DCLSi)) + output_obs <- data.frame(DFBETAOij, "Leverage" = as.numeric(Hobs), + "Cook" = as.numeric(DCOij)) + output <- list("Id" = output_id, "Obs" = output_obs) + return(output) } + +#' @title Influence measures for McGLMs +#' +#' @description Compute influence measures for multivariate covariance +#' generalized linear models. Leverage, DFBETA and Cook's distance +#' for observations. Auxiliar function for \code{mc_influence}. +#' @param Di D matrix for the cluster i. +#' @param Ci C matrix for the cluster i. +#' @param inv_Ci Inverse of C matrix for the cluster i. +#' @param ri Residual vector for the cluster i. +#' @param inv_M Inverse of variance/covariance of regression parameters. +#' @return Matrix with the DFBETA for observation in the cluster i. +#' +#' @author Wagner Hugo Bonat, \email{wbonat@@ufpr.br} +#' @export + +mc_dfbetaOij <- function(Di, Ci, inv_Ci, ri, inv_M) { + nobs <- dim(inv_Ci)[1] + nbeta <- dim(Di)[2] + DFBETAOij <- matrix(NA, ncol = nbeta, nrow = nobs) + for (i in 1:nobs) { + Ci.jj <- Matrix(Ci[-i,i]) + Cij.j <- t(Ci.jj) + inv_Ci.j <- inv_Ci[-i,-i] + Di.j <- Di[-i,] + Dij_temp <- Di[i,] - Cij.j%*%inv_Ci.j%*%Di.j + rij_temp <- as.numeric(ri[i] - Cij.j%*%inv_Ci.j%*%ri[-i]) + Cij_temp <- as.numeric(Ci[i,i] - Cij.j%*%inv_Ci.j%*%t(Cij.j)) + Hij_temp <- as.numeric(Dij_temp%*%inv_M%*%t(Dij_temp)%*%solve(Cij_temp)) + part2 <- Matrix(rij_temp/(Cij_temp*(1-Hij_temp))) + DFBETAOij[i,] <- as.numeric(t(inv_M%*%t(Dij_temp)%*%part2)) + } + return(DFBETAOij) +} + + +