Skip to content
Snippets Groups Projects
Commit ef336b53 authored by wbonat's avatar wbonat
Browse files

Fix all measures depedent on the H matrix

parent 59e71b95
No related branches found
No related tags found
No related merge requests found
#' Influence measures
#' @title Influence measures for McGLMs
#'
#' @description Compute influence measures for multivariate covariance generalized linear models.
#' Leverage, DFBETA and Cook's distance for unit sample and observations.
#' @description Compute influence measures for multivariate covariance
#' generalized linear models. Leverage, DFBETA and Cook's distance
#' for unit sample and observations.
#' @param object An object of \code{mcglm} class.
#' @param id a vector which identifies the clusters.
#' The length and order of id should match with the number of
#' observations.
#' @return A list with influence measures for cluster and observations.
#'
#' @param object An object of mcglm class.
#' @param id a vector which identifies the clusters. The length and order of id should be the
#' same as the number of observations. Data are assumed to be sorted so that observations on a cluster
#' are contiguous rows for all entities in the formula.
#' @return A matrix. Note that the function assumes that the data are in the correct order.
#' @author Wagner Hugo Bonat, \email{wbonat@@ufpr.br}
#' @export
mc_influence <- function(object, id) {
temp_data <- data.frame(object$residuals, id)
data_id <- split(temp_data, temp_data$id)
n_id <- length(data_id)
D <- bdiag(lapply(object$mu_list, function(x) x$D))
n_beta <- dim(D)[2]
R <- bdiag(lapply(data_id, function(x) {
tcrossprod(x[, 1])
}))
# Leverage for observations
inv_M <- -object$inv_S_beta
M <- solve(inv_M)
temp_data <- data.frame(res = object$residuals, id)
temp_data_group <- split(temp_data, temp_data$id)
D <- bdiag(lapply(object$mu_list, function(x) x$D))
tD_invC <- t(D) %*% object$inv_C
H <- D %*% inv_M %*% tD_invC
leverage_obs <- diag(H)
leverage_group <- tapply(leverage_obs, id, sum)
I <- Diagonal(object$n_obs, 1)
n_group <- length(temp_data_group)
indexes <- matrix(NA, ncol = 2, nrow = n_group)
n_obs_group <- table(id)
indexes[1, ] <- c(1, as.numeric(n_obs_group[1]))
DFBETA_clust <- list()
D_temp <- D[c(indexes[1, 1]:indexes[1, 2]), ]
inv_C_temp <- object$inv_C[c(indexes[1, 1]:indexes[1, 2]), c(indexes[1, 1]:indexes[1, 2])]
C_temp <- object$C[c(indexes[1, 1]:indexes[1, 2]), c(indexes[1, 1]:indexes[1, 2])]
H_temp <- H[c(indexes[1, 1]:indexes[1, 2]), c(indexes[1, 1]:indexes[1, 2])]
res_temp <- object$residuals[indexes[1, 1]:indexes[1, 2]]
DFBETAOij <- list()
padroniza <- function(x, M) {
as.numeric((t(as.numeric(x)) %*% M %*% as.numeric(x))/dim(M)[1])
}
dfbetaOij <- function(D_temp, C_temp, inv_C_temp, res_temp) {
DFBETA_temp <- matrix(NA, ncol = dim(D_temp)[2], nrow = dim(D_temp)[1])
k = 1
for (j in 1:length(res_temp)) {
Dij <- D_temp[j, k] - C_temp[k, -j] %*% inv_C_temp[-k, -j] %*% D_temp[-j, ]
rij <- res_temp[j] - C_temp[k, -j] %*% inv_C_temp[-k, -j] %*% res_temp[-j]
Vij <- C_temp[k, j] - C_temp[k, -j] %*% inv_C_temp[-k, -j] %*% C_temp[-j, k]
Hij <- try(Dij %*% object$inv_S_beta %*% t(Dij) %*% solve(Vij), silent = TRUE)
if (class(Hij) == "try-error") {
DFBETA_temp[j, ] <- NA
uni_id <- unique(id)
Hi <- list()
Di <- list()
inv_Ci <- list()
for (i in 1:n_id) {
idTF <- id == uni_id[i]
if(sum(idTF) == 1) {
Di[[i]] <- Matrix(D[idTF,], nrow = sum(idTF), ncol = dim(D)[2])
} else {
Di[[i]] <- D[idTF,]
}
if (class(Hij) != "try-error") {
DFBETA_temp[j, ] <- as.numeric(t(object$inv_S_beta %*% t(Dij) %*% (rij/(Vij - (1 - Hij)))))
inv_Ci[[i]] <- object$inv_C[idTF,idTF]
Hi[[i]] <- Di[[i]]%*%inv_M%*%t(Di[[i]])%*%inv_Ci[[i]]
}
H <- bdiag(Hi)
# Leverage for observations
Hobs <- diag(H)
# Leverage for clusters
Hid <- tapply(Hobs, id, sum)
# DFBETAs for clusters
I <- Diagonal(dim(temp_data)[1], 1)
inv_IH <- solve(I - H)
DFBETACi <- list()
DFBETAOij <- list()
DCLSi <- c()
for(i in 1:n_id) {
idTF <- id == uni_id[i]
if(sum(idTF) == 1) {
D_i <- Matrix(D[idTF,], nrow = sum(idTF), ncol = n_beta)
inv_C_i <- Matrix(object$inv_C[idTF,idTF],ncol = 1, nrow = 1)
} else {
D_i <- D[idTF,]
inv_C_i <- object$inv_C[idTF,idTF]
}
return(DFBETA_temp)
inv_IH_i = inv_IH[idTF,idTF]
r_i <- object$residuals[idTF]
DFBETACi[[i]] <- t(inv_M%*%t(D_i)%*%inv_C_i%*%inv_IH_i%*%r_i)
DCLSi[i] <- as.numeric(DFBETACi[[i]]%*%M%*%t(DFBETACi[[i]]))/n_beta
if(dim(inv_C_i)[1] == 1) {DFBETAOij[[i]] <- DFBETACi[[i]]
} else {
DFBETAOij[[i]] <- mc_dfbetaOij(Di = D_i, Ci = object$C[idTF,idTF],
inv_Ci = inv_C_i, ri = r_i,
inv_M = inv_M)
}
DFBETA_clust[[1]] <- inv_M %*% t(D_temp) %*% inv_C_temp %*% res_temp
DFBETAOij[[1]] <- dfbetaOij(D_temp, C_temp, inv_C_temp, res_temp)
for (i in 2:n_group) {
indexes[i, ] <- c(indexes[i - 1, ][2] + 1, n_obs_group[i] + indexes[i - 1, ][2])
D_temp <- D[c(indexes[i, 1]:indexes[i, 2]), ]
inv_C_temp <- object$inv_C[c(indexes[i, 1]:indexes[i, 2]), c(indexes[i, 1]:indexes[i, 2])]
C_temp <- object$C[c(indexes[i, 1]:indexes[i, 2]), c(indexes[i, 1]:indexes[i, 2])]
H_temp <- H[c(indexes[i, 1]:indexes[i, 2]), c(indexes[i, 1]:indexes[i, 2])]
res_temp <- object$residuals[indexes[i, 1]:indexes[i, 2]]
D_temp <- matrix(D_temp, nrow = n_obs_group[i])
DFBETA_clust[[i]] <- inv_M %*% t(D_temp) %*% inv_C_temp %*% res_temp
if (n_obs_group[i] == 1) {
DFBETAOij[[i]] <- t(as.matrix(DFBETA_clust[[i]]))
}
if (n_obs_group[i] != 1) {
DFBETAOij[[i]] <- dfbetaOij(D_temp, C_temp, inv_C_temp, res_temp)
DFBETACi <- do.call(rbind, DFBETACi)
DFBETAOij <- do.call(rbind, DFBETAOij)
DCOij = do.call(rbind, apply(DFBETAOij, MARGIN = 1,
function(x,M,n_beta)t(x)%*%M%*%x/n_beta,
M = M, n_beta = n_beta))
std.error <- coef(object, std.error = TRUE)$Std.error
DFBETACi = data.frame(as.matrix(DFBETACi))/std.error
DFBETAOij = data.frame(as.matrix(DFBETAOij))/std.error
names(DFBETACi) <- do.call(c, object$beta_names)
names(DFBETAOij) <- do.call(c, object$beta_names)
output_id <- data.frame(DFBETACi, "Leverage" = as.numeric(Hid),
"Cook" = as.numeric(DCLSi))
output_obs <- data.frame(DFBETAOij, "Leverage" = as.numeric(Hobs),
"Cook" = as.numeric(DCOij))
output <- list("Id" = output_id, "Obs" = output_obs)
return(output)
}
#' @title Influence measures for McGLMs
#'
#' @description Compute influence measures for multivariate covariance
#' generalized linear models. Leverage, DFBETA and Cook's distance
#' for observations. Auxiliar function for \code{mc_influence}.
#' @param Di D matrix for the cluster i.
#' @param Ci C matrix for the cluster i.
#' @param inv_Ci Inverse of C matrix for the cluster i.
#' @param ri Residual vector for the cluster i.
#' @param inv_M Inverse of variance/covariance of regression parameters.
#' @return Matrix with the DFBETA for observation in the cluster i.
#'
#' @author Wagner Hugo Bonat, \email{wbonat@@ufpr.br}
#' @export
mc_dfbetaOij <- function(Di, Ci, inv_Ci, ri, inv_M) {
nobs <- dim(inv_Ci)[1]
nbeta <- dim(Di)[2]
DFBETAOij <- matrix(NA, ncol = nbeta, nrow = nobs)
for (i in 1:nobs) {
Ci.jj <- Matrix(Ci[-i,i])
Cij.j <- t(Ci.jj)
inv_Ci.j <- inv_Ci[-i,-i]
Di.j <- Di[-i,]
Dij_temp <- Di[i,] - Cij.j%*%inv_Ci.j%*%Di.j
rij_temp <- as.numeric(ri[i] - Cij.j%*%inv_Ci.j%*%ri[-i])
Cij_temp <- as.numeric(Ci[i,i] - Cij.j%*%inv_Ci.j%*%t(Cij.j))
Hij_temp <- as.numeric(Dij_temp%*%inv_M%*%t(Dij_temp)%*%solve(Cij_temp))
part2 <- Matrix(rij_temp/(Cij_temp*(1-Hij_temp)))
DFBETAOij[i,] <- as.numeric(t(inv_M%*%t(Dij_temp)%*%part2))
}
DFBETA <- lapply(DFBETA_clust, as.matrix)
DFBETA <- lapply(DFBETA, t)
DFBETA <- plyr::ldply(DFBETA, data.frame)
names(DFBETA) <- object$beta_names[[1]]
DFBETAOij <- plyr::ldply(DFBETAOij, data.frame)
names(DFBETAOij) <- object$beta_names[[1]]
DCLSi <- apply(as.matrix(DFBETA), MARGIN = 1, FUN = padroniza, M = M)
DCLOij <- apply(as.matrix(DFBETAOij), MARGIN = 1, FUN = padroniza, M = M)
std.error <- coef(object, std.error = TRUE, type = "beta")$Std.error
DFBETA_temp <- t(apply(DFBETA, MARGIN = 1, FUN = function(x, std.error) {
as.numeric(x/std.error)
}, std.error = std.error))
DFBETAOij_temp <- t(apply(DFBETAOij, MARGIN = 1, FUN = function(x, std.error) {
as.numeric(x/std.error)
}, std.error = std.error))
output_clust <- data.frame(Leverage = leverage_group, DFBETA = DFBETA_temp, Cook = DCLSi)
output_obs <- data.frame(Leverage = leverage_obs, DFBETA = DFBETAOij_temp, Cook = DCLOij)
output <- list(Id = output_clust, Observations = output_obs)
return(output)
return(DFBETAOij)
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment