Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
mcglm
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Harbor Registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
leg
mcglm
Commits
ef336b53
Commit
ef336b53
authored
9 years ago
by
wbonat
Browse files
Options
Downloads
Patches
Plain Diff
Fix all measures depedent on the H matrix
parent
59e71b95
No related branches found
No related tags found
No related merge requests found
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
R/mc_influence.R
+116
-83
116 additions, 83 deletions
R/mc_influence.R
with
116 additions
and
83 deletions
R/mc_influence.R
+
116
−
83
View file @
ef336b53
#' Influence measures
#'
@title
Influence measures
for McGLMs
#'
#' @description Compute influence measures for multivariate covariance generalized linear models.
#' Leverage, DFBETA and Cook's distance for unit sample and observations.
#' @description Compute influence measures for multivariate covariance
#' generalized linear models. Leverage, DFBETA and Cook's distance
#' for unit sample and observations.
#' @param object An object of \code{mcglm} class.
#' @param id a vector which identifies the clusters.
#' The length and order of id should match with the number of
#' observations.
#' @return A list with influence measures for cluster and observations.
#'
#' @param object An object of mcglm class.
#' @param id a vector which identifies the clusters. The length and order of id should be the
#' same as the number of observations. Data are assumed to be sorted so that observations on a cluster
#' are contiguous rows for all entities in the formula.
#' @return A matrix. Note that the function assumes that the data are in the correct order.
#' @author Wagner Hugo Bonat, \email{wbonat@@ufpr.br}
#' @export
mc_influence
<-
function
(
object
,
id
)
{
temp_data
<-
data.frame
(
object
$
residuals
,
id
)
data_id
<-
split
(
temp_data
,
temp_data
$
id
)
n_id
<-
length
(
data_id
)
D
<-
bdiag
(
lapply
(
object
$
mu_list
,
function
(
x
)
x
$
D
))
n_beta
<-
dim
(
D
)[
2
]
R
<-
bdiag
(
lapply
(
data_id
,
function
(
x
)
{
tcrossprod
(
x
[,
1
])
}))
# Leverage for observations
inv_M
<-
-
object
$
inv_S_beta
M
<-
solve
(
inv_M
)
temp_data
<-
data.frame
(
res
=
object
$
residuals
,
id
)
temp_data_group
<-
split
(
temp_data
,
temp_data
$
id
)
D
<-
bdiag
(
lapply
(
object
$
mu_list
,
function
(
x
)
x
$
D
))
tD_invC
<-
t
(
D
)
%*%
object
$
inv_C
H
<-
D
%*%
inv_M
%*%
tD_invC
leverage_obs
<-
diag
(
H
)
leverage_group
<-
tapply
(
leverage_obs
,
id
,
sum
)
I
<-
Diagonal
(
object
$
n_obs
,
1
)
n_group
<-
length
(
temp_data_group
)
indexes
<-
matrix
(
NA
,
ncol
=
2
,
nrow
=
n_group
)
n_obs_group
<-
table
(
id
)
indexes
[
1
,
]
<-
c
(
1
,
as.numeric
(
n_obs_group
[
1
]))
DFBETA_clust
<-
list
()
D_temp
<-
D
[
c
(
indexes
[
1
,
1
]
:
indexes
[
1
,
2
]),
]
inv_C_temp
<-
object
$
inv_C
[
c
(
indexes
[
1
,
1
]
:
indexes
[
1
,
2
]),
c
(
indexes
[
1
,
1
]
:
indexes
[
1
,
2
])]
C_temp
<-
object
$
C
[
c
(
indexes
[
1
,
1
]
:
indexes
[
1
,
2
]),
c
(
indexes
[
1
,
1
]
:
indexes
[
1
,
2
])]
H_temp
<-
H
[
c
(
indexes
[
1
,
1
]
:
indexes
[
1
,
2
]),
c
(
indexes
[
1
,
1
]
:
indexes
[
1
,
2
])]
res_temp
<-
object
$
residuals
[
indexes
[
1
,
1
]
:
indexes
[
1
,
2
]]
DFBETAOij
<-
list
()
padroniza
<-
function
(
x
,
M
)
{
as.numeric
((
t
(
as.numeric
(
x
))
%*%
M
%*%
as.numeric
(
x
))
/
dim
(
M
)[
1
])
}
dfbetaOij
<-
function
(
D_temp
,
C_temp
,
inv_C_temp
,
res_temp
)
{
DFBETA_temp
<-
matrix
(
NA
,
ncol
=
dim
(
D_temp
)[
2
],
nrow
=
dim
(
D_temp
)[
1
])
k
=
1
for
(
j
in
1
:
length
(
res_temp
))
{
Dij
<-
D_temp
[
j
,
k
]
-
C_temp
[
k
,
-
j
]
%*%
inv_C_temp
[
-
k
,
-
j
]
%*%
D_temp
[
-
j
,
]
rij
<-
res_temp
[
j
]
-
C_temp
[
k
,
-
j
]
%*%
inv_C_temp
[
-
k
,
-
j
]
%*%
res_temp
[
-
j
]
Vij
<-
C_temp
[
k
,
j
]
-
C_temp
[
k
,
-
j
]
%*%
inv_C_temp
[
-
k
,
-
j
]
%*%
C_temp
[
-
j
,
k
]
Hij
<-
try
(
Dij
%*%
object
$
inv_S_beta
%*%
t
(
Dij
)
%*%
solve
(
Vij
),
silent
=
TRUE
)
if
(
class
(
Hij
)
==
"try-error"
)
{
DFBETA_temp
[
j
,
]
<-
NA
uni_id
<-
unique
(
id
)
Hi
<-
list
()
Di
<-
list
()
inv_Ci
<-
list
()
for
(
i
in
1
:
n_id
)
{
idTF
<-
id
==
uni_id
[
i
]
if
(
sum
(
idTF
)
==
1
)
{
Di
[[
i
]]
<-
Matrix
(
D
[
idTF
,],
nrow
=
sum
(
idTF
),
ncol
=
dim
(
D
)[
2
])
}
else
{
Di
[[
i
]]
<-
D
[
idTF
,]
}
if
(
class
(
Hij
)
!=
"try-error"
)
{
DFBETA_temp
[
j
,
]
<-
as.numeric
(
t
(
object
$
inv_S_beta
%*%
t
(
Di
j
)
%*%
(
rij
/
(
Vij
-
(
1
-
Hij
)))))
inv_Ci
[[
i
]]
<-
object
$
inv_C
[
idTF
,
idTF
]
Hi
[[
i
]]
<-
Di
[[
i
]]
%*%
inv_M
%*%
t
(
Di
[[
i
]])
%*%
inv_Ci
[[
i
]]
}
H
<-
bdiag
(
Hi
)
# Leverage for observations
Hobs
<-
diag
(
H
)
# Leverage for clusters
Hid
<-
tapply
(
Hobs
,
id
,
sum
)
# DFBETAs for clusters
I
<-
Diagonal
(
dim
(
temp_data
)[
1
],
1
)
inv_IH
<-
solve
(
I
-
H
)
DFBETACi
<-
list
()
DFBETAOij
<-
list
()
DCLSi
<-
c
()
for
(
i
in
1
:
n_id
)
{
idTF
<-
id
==
uni_id
[
i
]
if
(
sum
(
idTF
)
==
1
)
{
D_i
<-
Matrix
(
D
[
idTF
,],
nrow
=
sum
(
idTF
),
ncol
=
n_beta
)
inv_C_i
<-
Matrix
(
object
$
inv_C
[
idTF
,
idTF
],
ncol
=
1
,
nrow
=
1
)
}
else
{
D_i
<-
D
[
idTF
,]
inv_C_i
<-
object
$
inv_C
[
idTF
,
idTF
]
}
return
(
DFBETA_temp
)
inv_IH_i
=
inv_IH
[
idTF
,
idTF
]
r_i
<-
object
$
residuals
[
idTF
]
DFBETACi
[[
i
]]
<-
t
(
inv_M
%*%
t
(
D_i
)
%*%
inv_C_i
%*%
inv_IH_i
%*%
r_i
)
DCLSi
[
i
]
<-
as.numeric
(
DFBETACi
[[
i
]]
%*%
M
%*%
t
(
DFBETACi
[[
i
]]))
/
n_beta
if
(
dim
(
inv_C_i
)[
1
]
==
1
)
{
DFBETAOij
[[
i
]]
<-
DFBETACi
[[
i
]]
}
else
{
DFBETAOij
[[
i
]]
<-
mc_dfbetaOij
(
Di
=
D_i
,
Ci
=
object
$
C
[
idTF
,
idTF
],
inv_Ci
=
inv_C_i
,
ri
=
r_i
,
inv_M
=
inv_M
)
}
DFBETA_clust
[[
1
]]
<-
inv_M
%*%
t
(
D_temp
)
%*%
inv_C_temp
%*%
res_temp
DFBETAOij
[[
1
]]
<-
dfbetaOij
(
D_temp
,
C_temp
,
inv_C_temp
,
res_temp
)
for
(
i
in
2
:
n_group
)
{
indexes
[
i
,
]
<-
c
(
indexes
[
i
-
1
,
][
2
]
+
1
,
n_obs_group
[
i
]
+
indexes
[
i
-
1
,
][
2
])
D_temp
<-
D
[
c
(
indexes
[
i
,
1
]
:
indexes
[
i
,
2
]),
]
inv_C_temp
<-
object
$
inv_C
[
c
(
indexes
[
i
,
1
]
:
indexes
[
i
,
2
]),
c
(
indexes
[
i
,
1
]
:
indexes
[
i
,
2
])]
C_temp
<-
object
$
C
[
c
(
indexes
[
i
,
1
]
:
indexes
[
i
,
2
]),
c
(
indexes
[
i
,
1
]
:
indexes
[
i
,
2
])]
H_temp
<-
H
[
c
(
indexes
[
i
,
1
]
:
indexes
[
i
,
2
]),
c
(
indexes
[
i
,
1
]
:
indexes
[
i
,
2
])]
res_temp
<-
object
$
residuals
[
indexes
[
i
,
1
]
:
indexes
[
i
,
2
]]
D_temp
<-
matrix
(
D_temp
,
nrow
=
n_obs_group
[
i
])
DFBETA_clust
[[
i
]]
<-
inv_M
%*%
t
(
D_temp
)
%*%
inv_C_temp
%*%
res_temp
if
(
n_obs_group
[
i
]
==
1
)
{
DFBETAOij
[[
i
]]
<-
t
(
as.matrix
(
DFBETA_clust
[[
i
]]))
}
if
(
n_obs_group
[
i
]
!=
1
)
{
DFBETAOij
[[
i
]]
<-
dfbetaOij
(
D_temp
,
C_temp
,
inv_C_temp
,
res_temp
)
DFBETACi
<-
do.call
(
rbind
,
DFBETACi
)
DFBETAOij
<-
do.call
(
rbind
,
DFBETAOij
)
DCOij
=
do.call
(
rbind
,
apply
(
DFBETAOij
,
MARGIN
=
1
,
function
(
x
,
M
,
n_beta
)
t
(
x
)
%*%
M
%*%
x
/
n_beta
,
M
=
M
,
n_beta
=
n_beta
))
std.error
<-
coef
(
object
,
std.error
=
TRUE
)
$
Std.error
DFBETACi
=
data.frame
(
as.matrix
(
DFBETACi
))
/
std.error
DFBETAOij
=
data.frame
(
as.matrix
(
DFBETAOij
))
/
std.error
names
(
DFBETACi
)
<-
do.call
(
c
,
object
$
beta_names
)
names
(
DFBETAOij
)
<-
do.call
(
c
,
object
$
beta_names
)
output_id
<-
data.frame
(
DFBETACi
,
"Leverage"
=
as.numeric
(
Hid
),
"Cook"
=
as.numeric
(
DCLSi
))
output_obs
<-
data.frame
(
DFBETAOij
,
"Leverage"
=
as.numeric
(
Hobs
),
"Cook"
=
as.numeric
(
DCOij
))
output
<-
list
(
"Id"
=
output_id
,
"Obs"
=
output_obs
)
return
(
output
)
}
#' @title Influence measures for McGLMs
#'
#' @description Compute influence measures for multivariate covariance
#' generalized linear models. Leverage, DFBETA and Cook's distance
#' for observations. Auxiliar function for \code{mc_influence}.
#' @param Di D matrix for the cluster i.
#' @param Ci C matrix for the cluster i.
#' @param inv_Ci Inverse of C matrix for the cluster i.
#' @param ri Residual vector for the cluster i.
#' @param inv_M Inverse of variance/covariance of regression parameters.
#' @return Matrix with the DFBETA for observation in the cluster i.
#'
#' @author Wagner Hugo Bonat, \email{wbonat@@ufpr.br}
#' @export
mc_dfbetaOij
<-
function
(
Di
,
Ci
,
inv_Ci
,
ri
,
inv_M
)
{
nobs
<-
dim
(
inv_Ci
)[
1
]
nbeta
<-
dim
(
Di
)[
2
]
DFBETAOij
<-
matrix
(
NA
,
ncol
=
nbeta
,
nrow
=
nobs
)
for
(
i
in
1
:
nobs
)
{
Ci.jj
<-
Matrix
(
Ci
[
-
i
,
i
])
Cij.j
<-
t
(
Ci.jj
)
inv_Ci.j
<-
inv_Ci
[
-
i
,
-
i
]
Di.j
<-
Di
[
-
i
,]
Dij_temp
<-
Di
[
i
,]
-
Cij.j
%*%
inv_Ci.j
%*%
Di.j
rij_temp
<-
as.numeric
(
ri
[
i
]
-
Cij.j
%*%
inv_Ci.j
%*%
ri
[
-
i
])
Cij_temp
<-
as.numeric
(
Ci
[
i
,
i
]
-
Cij.j
%*%
inv_Ci.j
%*%
t
(
Cij.j
))
Hij_temp
<-
as.numeric
(
Dij_temp
%*%
inv_M
%*%
t
(
Dij_temp
)
%*%
solve
(
Cij_temp
))
part2
<-
Matrix
(
rij_temp
/
(
Cij_temp
*
(
1
-
Hij_temp
)))
DFBETAOij
[
i
,]
<-
as.numeric
(
t
(
inv_M
%*%
t
(
Dij_temp
)
%*%
part2
))
}
DFBETA
<-
lapply
(
DFBETA_clust
,
as.matrix
)
DFBETA
<-
lapply
(
DFBETA
,
t
)
DFBETA
<-
plyr
::
ldply
(
DFBETA
,
data.frame
)
names
(
DFBETA
)
<-
object
$
beta_names
[[
1
]]
DFBETAOij
<-
plyr
::
ldply
(
DFBETAOij
,
data.frame
)
names
(
DFBETAOij
)
<-
object
$
beta_names
[[
1
]]
DCLSi
<-
apply
(
as.matrix
(
DFBETA
),
MARGIN
=
1
,
FUN
=
padroniza
,
M
=
M
)
DCLOij
<-
apply
(
as.matrix
(
DFBETAOij
),
MARGIN
=
1
,
FUN
=
padroniza
,
M
=
M
)
std.error
<-
coef
(
object
,
std.error
=
TRUE
,
type
=
"beta"
)
$
Std.error
DFBETA_temp
<-
t
(
apply
(
DFBETA
,
MARGIN
=
1
,
FUN
=
function
(
x
,
std.error
)
{
as.numeric
(
x
/
std.error
)
},
std.error
=
std.error
))
DFBETAOij_temp
<-
t
(
apply
(
DFBETAOij
,
MARGIN
=
1
,
FUN
=
function
(
x
,
std.error
)
{
as.numeric
(
x
/
std.error
)
},
std.error
=
std.error
))
output_clust
<-
data.frame
(
Leverage
=
leverage_group
,
DFBETA
=
DFBETA_temp
,
Cook
=
DCLSi
)
output_obs
<-
data.frame
(
Leverage
=
leverage_obs
,
DFBETA
=
DFBETAOij_temp
,
Cook
=
DCLOij
)
output
<-
list
(
Id
=
output_clust
,
Observations
=
output_obs
)
return
(
output
)
return
(
DFBETAOij
)
}
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment