|
Modelos de Regressão para Dados de Contagem com o R
|
Walmes M. Zeviani, Eduardo E. Ribeiro Jr & Cesar A. Taconeli
# Definições da sessão.
# devtools::load_all("../")
library(lattice)
library(latticeExtra)
library(bbmle)
library(corrplot)
library(plyr)
library(car)
library(doBy)
library(multcomp)
library(MRDCr)
Se uma variável aleatória \(Y\) tem distribuição de probabilidades Poisson generalizada, então sua função de probabilidade, na parametrização de média, é
\[ f(y) = \left( \dfrac{\lambda}{1+\alpha\lambda} \right)^{y} \frac{(1+\alpha y)^{y-1}}{y!} \exp\left\{-\lambda \frac{(1+\alpha y)}{(1+\alpha \lambda)}\right\}, \] em que \(\lambda\) é a média da distribuição e \(\alpha\) é a o parâmetro de dispersão. Nessa parametrização, tem-se
A função de log-verossimilhança é \[ \ell(y; \lambda, \alpha) = \sum_{i=1}^{n} y_{i}\ln(\lambda)- \ln(1+\alpha\lambda)+ (y_{i}-1)\ln(1+\alpha y)- \lambda\frac{(1+\alpha y_{i})}{(1+\alpha\lambda)}- \ln(y_{i}!). \]
grid <- expand.grid(lambda = c(2, 8, 15),
alpha = c(-0.05, 0, 0.05))
y <- 0:35
py <- mapply(FUN = dpgnz,
lambda = grid$lambda,
alpha = grid$alpha,
MoreArgs = list(y = y), SIMPLIFY = FALSE)
grid <- cbind(grid[rep(1:nrow(grid), each = length(y)), ],
y = y,
py = unlist(py))
useOuterStrips(xyplot(py ~ y | factor(lambda) + factor(alpha),
ylab = expression(f(y)),
xlab = expression(y),
data = grid, type = "h",
panel = function(x, y, ...) {
m <- sum(x * y)
panel.xyplot(x, y, ...)
panel.abline(v = m, lty = 2)
}),
strip = strip.custom(
strip.names = TRUE,
var.name = expression(lambda == ""),
sep = ""),
strip.left = strip.custom(
strip.names = TRUE,
var.name = expression(alpha == ""),
sep = ""))
#-----------------------------------------------------------------------
# Relação média variância.
curve(lambda * (1 + 0 * lambda)^2,
from = 0, to = 10, xname = "lambda",
ylab = expression(lambda %.% (1 + alpha %.% lambda)^2),
xlab = expression(lambda))
alpha <- seq(-0.25, 0.25, by = 0.025)
col <- brewer.pal(n = 5, name = "Spectral")
col <- colorRampPalette(colors = col)(length(alpha))
for (a in seq_along(alpha)) {
curve(lambda * (1 + alpha[a] * lambda)^2,
add = TRUE, xname = "lambda", col = col[a], lwd = 2)
}
#-----------------------------------------------------------------------
# Gráfico do espaço paramétrico de lambda x alpha.
y <- 0:400
fun <- Vectorize(vectorize.args = c("lambda", "alpha"),
FUN = function(lambda, alpha) {
sum(dpgnz(y = y, lambda = lambda, alpha = alpha))
})
grid <- list(lambda = seq(0.2, 50, by = 0.2),
alpha = seq(-0.98, 0.98, by = 0.02))
grid$sum <- with(grid, outer(lambda, alpha, fun))
grid <- with(grid,
cbind(expand.grid(lambda = lambda, alpha = alpha),
data.frame(sum = c(sum))))
levelplot(sum ~ lambda + alpha,
data = subset(grid, round(sum, 3) == 1),
col.regions = gray.colors) +
layer(panel.abline(h = 0, lty = 2)) +
layer(panel.curve(-1/x))
# Já que lambda * alpha > -1, então alpha = -1/lambda dá a fronteira.
# Função de log-Verossimilhança da Poisson Generalizada na
# parametrização de modelo de regressão.
MRDCr::llpgnz
## function(params, y, X, offset = NULL) {
## # params: vetor de parâmetros;
## # params[1]: parâmetro de dispersão (alpha);
## # params[-1]: parâmetro de locação (lambda);
## # y: variável resposta (contagem);
## # X: matriz do modelo linear;
## # offset: tamanho do domínio onde y foi medido (exposição);
## #----------------------------------------
## if (is.null(offset)) {
## offset <- 1L
## }
## alpha <- params[1]
## lambda <- offset * exp(X %*% params[-1])
## z <- 1 + alpha * lambda
## w <- 1 + alpha * y
## ll <- y * (log(lambda) - log(z)) +
## (y - 1) * log(w) -
## lambda * (w/z) -
## lfactorial(y)
## # Negativo da log-likelihood.
## -sum(ll)
## }
## <environment: namespace:MRDCr>
#-----------------------------------------------------------------------
# Gerando uma amostra aleatória da Poisson, para teste.
# Offset = 2, lambda = 3.
y <- rpois(100, lambda = 2 * 3)
L <- list(y = y,
offset = rep(2, length(y)),
X = cbind(rep(1, length(y))))
start <- c(alpha = 0, lambda = 1)
parnames(llpgnz) <- names(start)
# Como \alpha foi fixado em 1, essa ll corresponde à Poisson.
n0 <- mle2(minuslogl = llpgnz,
start = start, data = L,
fixed = list(alpha = 0), vecpar = TRUE)
# Para conferir.
c(coef(n0)["lambda"],
coef(glm(y ~ offset(log(L$offset)), family = poisson)))
## lambda (Intercept)
## 1.0936 1.0936
# Estimando o \alpha.
n1 <- mle2(llpgnz, start = start, data = L, vecpar = TRUE)
coef(n1)
## alpha lambda
## -0.006487516 1.093599719
# Perfil de verossimilhança dos parâmetros.
plot(profile(n1))
# Covariância.
V <- cov2cor(vcov(n1))
corrplot.mixed(V, lower = "number", upper = "ellipse",
diag = "l", tl.pos = "lt", tl.col = "black",
tl.cex = 0.8, col = brewer.pal(9, "Greys")[-(1:3)])
dev.off()
## null device
## 1
Resultados de um experimento fatorial (3 x 5), em delineamento de blocos casualizados, que estudou a influência de níveis de potássio na adubação de soja em combinação com irrigação em casa de vegetação. As variáveis de contagem registradas nesse experimento foram o número de vagens viáveis (e não viáveis) e o número total de sementes por parcela com duas plantas de soja.
#-----------------------------------------------------------------------
# Carregando e explorando os dados.
data(soja, package = "MRDCr")
str(soja)
## 'data.frame': 75 obs. of 5 variables:
## $ K : int 0 30 60 120 180 0 30 60 120 180 ...
## $ umid: Factor w/ 3 levels "37,5","50","62,5": 1 1 1 1 1 2 2 2 2 2 ...
## $ bloc: Factor w/ 5 levels "I","II","III",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ ngra: int 136 159 156 171 190 140 193 200 208 237 ...
## $ nvag: int 56 62 66 68 82 63 86 94 86 97 ...
# A observação 74 é um outlier.
soja <- soja[-74, ]
xyplot(nvag ~ K | umid, data = soja, layout = c(NA, 1),
type = c("p", "smooth"),
ylab = "Número de vagens por parcela",
xlab = expression("Dose de potássio aplicada"~(mg ~ dm^{3})),
strip = strip.custom(strip.names = TRUE, var.name = "Umidade"))
soja <- transform(soja, K = factor(K))
#-----------------------------------------------------------------------
# Modelo Poisson.
m0 <- glm(nvag ~ bloc + umid * K, data = soja, family = poisson)
m1 <- update(m0, family = quasipoisson)
# Diagnóstico.
par(mfrow = c(2, 2))
plot(m0); layout(1)
# Medidas de decisão.
# anova(m0, test = "Chisq")
anova(m1, test = "F")
## Analysis of Deviance Table
##
## Model: quasipoisson, link: log
##
## Response: nvag
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev F Pr(>F)
## NULL 73 322.68
## bloc 4 14.284 69 308.40 2.9785 0.02684 *
## umid 2 92.911 67 215.49 38.7485 3.157e-11 ***
## K 4 136.060 63 79.43 28.3719 8.316e-13 ***
## umid:K 8 14.150 55 65.28 1.4754 0.18750
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m1)
##
## Call:
## glm(formula = nvag ~ bloc + umid * K, family = quasipoisson,
## data = soja)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9681 -0.6393 -0.0195 0.4854 2.3306
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.95369 0.07547 52.389 < 2e-16 ***
## blocII -0.02928 0.04479 -0.654 0.516036
## blocIII -0.07265 0.04529 -1.604 0.114420
## blocIV -0.12544 0.04592 -2.732 0.008457 **
## blocV -0.10795 0.04705 -2.294 0.025606 *
## umid50 0.13404 0.09597 1.397 0.168117
## umid62,5 0.21656 0.09418 2.299 0.025303 *
## K30 0.27427 0.09300 2.949 0.004670 **
## K60 0.30797 0.09233 3.336 0.001530 **
## K120 0.32883 0.09192 3.577 0.000734 ***
## K180 0.25540 0.09338 2.735 0.008376 **
## umid50:K30 0.06322 0.12654 0.500 0.619324
## umid62,5:K30 -0.10747 0.12631 -0.851 0.398532
## umid50:K60 0.16561 0.12449 1.330 0.188897
## umid62,5:K60 0.10735 0.12286 0.874 0.386028
## umid50:K120 0.14920 0.12414 1.202 0.234563
## umid62,5:K120 0.11839 0.12487 0.948 0.347229
## umid50:K180 0.30370 0.12439 2.441 0.017873 *
## umid62,5:K180 0.19838 0.12325 1.610 0.113208
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 1.198902)
##
## Null deviance: 322.684 on 73 degrees of freedom
## Residual deviance: 65.278 on 55 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 4
#-----------------------------------------------------------------------
# Modelo Poisson Generalizado.
L <- with(soja,
list(y = nvag, offset = 1, X = model.matrix(m0)))
# Usa as estimativas do Poisson como valores iniciais para a PGen.
start <- c(alpha = 0, coef(m0))
parnames(llpgnz) <- names(start)
# Com alpha fixo em 0 corresponde à Poisson.
m2 <- mle2(llpgnz, start = start, data = L,
fixed = list(alpha = 0), vecpar = TRUE)
# Mesma medida de ajuste e estimativas.
c(logLik(m2), logLik(m0))
## [1] -259.6165 -259.6165
cbind(coef(m2)[-1], coef(m0))
## [,1] [,2]
## (Intercept) 3.95368723 3.95368724
## blocII -0.02927855 -0.02927854
## blocIII -0.07265048 -0.07265048
## blocIV -0.12543798 -0.12543798
## blocV -0.10794857 -0.10794857
## umid50 0.13404356 0.13404356
## umid62,5 0.21656458 0.21656458
## K30 0.27427290 0.27427290
## K60 0.30796674 0.30796674
## K120 0.32883188 0.32883188
## K180 0.25540441 0.25540441
## umid50:K30 0.06322288 0.06322288
## umid62,5:K30 -0.10747272 -0.10747272
## umid50:K60 0.16561471 0.16561471
## umid62,5:K60 0.10735066 0.10735066
## umid50:K120 0.14920392 0.14920392
## umid62,5:K120 0.11838578 0.11838578
## umid50:K180 0.30369921 0.30369921
## umid62,5:K180 0.19837927 0.19837927
# Poisson Generalizada.
m3 <- mle2(llpgnz, start = start, data = L, vecpar = TRUE)
# Teste para nulinidade do parâmetro de dispersão (H_0: alpha == 0).
anova(m3, m2)
## Likelihood Ratio Tests
## Model 1: m3, [llpgnz]: alpha+(Intercept)+blocII+blocIII+blocIV+
## blocV+umid50+umid62,5+K30+K60+K120+K180+
## umid50:K30+umid62,5:K30+umid50:K60+umid62,5:K60+
## umid50:K120+umid62,5:K120+umid50:K180+umid62,5:K180
## Model 2: m2, [llpgnz]: (Intercept)+blocII+blocIII+blocIV+blocV+
## umid50+umid62,5+K30+K60+K120+K180+umid50:K30+
## umid62,5:K30+umid50:K60+umid62,5:K60+umid50:K120+
## umid62,5:K120+umid50:K180+umid62,5:K180
## Tot Df Deviance Chisq Df Pr(>Chisq)
## 1 20 518.19
## 2 19 519.23 1.0402 1 0.3078
# Estimativas dos coeficientes.
cbind("PoissonGLM" = c(NA, coef(m0)),
"PoissonML" = coef(m2),
"PGeneraliz" = coef(m3))
## PoissonGLM PoissonML PGeneraliz
## NA 0.00000000 -0.001042601
## (Intercept) 3.95368724 3.95368723 3.953181451
## blocII -0.02927854 -0.02927855 -0.027923784
## blocIII -0.07265048 -0.07265048 -0.072056920
## blocIV -0.12543798 -0.12543798 -0.127982907
## blocV -0.10794857 -0.10794857 -0.105466353
## umid50 0.13404356 0.13404356 0.134139707
## umid62,5 0.21656458 0.21656458 0.216194659
## K30 0.27427290 0.27427290 0.273827867
## K60 0.30796674 0.30796674 0.307818797
## K120 0.32883188 0.32883188 0.328229190
## K180 0.25540441 0.25540441 0.255998709
## umid50:K30 0.06322288 0.06322288 0.064744844
## umid62,5:K30 -0.10747272 -0.10747272 -0.106284339
## umid50:K60 0.16561471 0.16561471 0.166265663
## umid62,5:K60 0.10735066 0.10735066 0.108202657
## umid50:K120 0.14920392 0.14920392 0.149064097
## umid62,5:K120 0.11838578 0.11838578 0.120194391
## umid50:K180 0.30369921 0.30369921 0.302905099
## umid62,5:K180 0.19837927 0.19837927 0.198624027
# Perfil para o parâmetro de dispersão.
plot(profile(m3, which = "alpha"))
abline(v = 0, lty = 2)
V <- cov2cor(vcov(m3))
corrplot.mixed(V, lower = "number", upper = "ellipse",
diag = "l", tl.pos = "lt", tl.col = "black",
tl.cex = 0.8, col = brewer.pal(9, "Greys")[-(1:3)])
dev.off()
## null device
## 1
# Tamanho das covariâncias com \alpha.
each(sum, mean, max)(abs(V[1, -1]))
## sum mean max
## 0.30139729 0.01586302 0.06988971
#-----------------------------------------------------------------------
# Testes de hipótese.
# Teste de Wald para a interação.
a <- c(0, attr(model.matrix(m0), "assign"))
ai <- a == max(a)
L <- t(replicate(sum(ai), rbind(coef(m3) * 0), simplify = "matrix"))
L[, ai] <- diag(sum(ai))
# Cálculo da estatística Chi-quadrado.
# t(L %*% coef(m3)) %*%
# solve(L %*% vcov(m3) %*% t(L)) %*%
# (L %*% coef(m3))
crossprod(L %*% coef(m3),
solve(L %*% vcov(m3) %*% t(L),
L %*% coef(m3)))
## [,1]
## [1,] 16.21861
# Teste de Wald para interação (poderia ser LRT, claro).
# É necessário passar um objeto glm mesmo fornecendo o restante a parte.
linearHypothesis(model = m0,
hypothesis.matrix = L,
vcov. = vcov(m3),
coef. = coef(m3))
## Linear hypothesis test
##
## Hypothesis:
## umid50:K30 = 0
## umid62,5:K30 = 0
## umid50:K60 = 0
## umid62,5:K60 = 0
## umid50:K120 = 0
## umid62,5:K120 = 0
## umid50:K180 = 0
## umid62,5:K180 = 0
##
## Model 1: restricted model
## Model 2: nvag ~ bloc + umid * K
##
## Note: Coefficient covariance matrix supplied.
##
## Res.Df Df Chisq Pr(>Chisq)
## 1 63
## 2 55 8 16.219 0.03936 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#-----------------------------------------------------------------------
# Predição com bandas de confiança.
X <- LSmatrix(m0, effect = c("umid", "K"))
pred <- attr(X, "grid")
pred <- transform(pred,
K = as.integer(K),
umid = factor(umid))
pred <- list(pois = pred, pgen = pred)
# Preditos pela Poisson.
# aux <- predict(m0, newdata = pred$pois, se.fit = TRUE)
# aux <- exp(aux$fit + outer(aux$se.fit, qn, FUN = "*"))
# pred$pois <- cbind(pred$pois, aux)
aux <- confint(glht(m0, linfct = X),
calpha = univariate_calpha())$confint
colnames(aux)[1] <- "fit"
pred$pois <- cbind(pred$pois, exp(aux))
str(pred$pois)
## 'data.frame': 15 obs. of 5 variables:
## $ umid: Factor w/ 3 levels "37,5","50","62,5": 1 2 3 1 2 3 1 2 3 1 ...
## $ K : int 0 0 0 30 30 30 60 60 60 120 ...
## $ fit : num 48.7 55.7 60.5 64.1 78.1 ...
## $ lwr : num 43 49.6 54.1 57.5 70.7 ...
## $ upr : num 55.3 62.7 67.7 71.5 86.3 ...
# Predito para a Poisson Generalizada.
aux <- predict(m3, newdata = X,
interval = "confidence", type = "response")
pred$pgen <- cbind(pred$pgen, aux[, c(2, 1, 3)])
pred <- ldply(pred, .id = "modelo")
pred <- arrange(pred, umid, K, modelo)
key <- list(type = "o", divide = 1,
lines = list(pch = 1:nlevels(pred$modelo),
lty = 1, col = 1),
text = list(c("Poisson", "Poisson Generelizada")))
xyplot(fit ~ K | umid, data = pred,
layout = c(NA, 1), as.table = TRUE,
xlim = extendrange(range(pred$K), f = 0.1),
key = key, pch = pred$modelo,
xlab = expression("Dose de potássio"~(mg~dm^{-3})),
ylab = "Número de vagens por parcela",
ly = pred$lwr, uy = pred$upr, cty = "bars", length = 0,
prepanel = prepanel.cbH,
desloc = 8 * scale(as.integer(pred$modelo), scale = FALSE),
panel = panel.cbH)
Análise do número de grãos por pacela do experimento com soja.
#-----------------------------------------------------------------------
xyplot(ngra ~ K | umid, data = soja, layout = c(NA, 1),
type = c("p", "smooth"),
ylab = "Número de grãos por parcela",
xlab = expression("Dose de potássio aplicada"~(mg ~ dm^{3})),
strip = strip.custom(strip.names = TRUE, var.name = "Umidade"))
#-----------------------------------------------------------------------
# Modelo Poisson.
m0 <- glm(ngra ~ bloc + umid * K, data = soja, family = poisson)
m1 <- update(m0, family = quasipoisson)
# Diagnóstico.
par(mfrow = c(2, 2))
plot(m0); layout(1)
# Medidas de decisão.
# anova(m0, test = "Chisq")
anova(m1, test = "Chisq")
## Analysis of Deviance Table
##
## Model: quasipoisson, link: log
##
## Response: ngra
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 73 761.43
## bloc 4 28.34 69 733.09 0.01471 *
## umid 2 185.06 67 548.03 < 2e-16 ***
## K 4 380.32 63 167.71 < 2e-16 ***
## umid:K 8 42.99 55 124.72 0.01606 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m1)
##
## Call:
## glm(formula = ngra ~ bloc + umid * K, family = quasipoisson,
## data = soja)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.6297 -1.0707 -0.0873 0.7428 3.2810
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.80196 0.06773 70.898 < 2e-16 ***
## blocII -0.01939 0.04017 -0.483 0.631230
## blocIII -0.03663 0.04035 -0.908 0.367964
## blocIV -0.10559 0.04107 -2.571 0.012884 *
## blocV -0.09313 0.04218 -2.208 0.031433 *
## umid50 0.13245 0.08611 1.538 0.129739
## umid62,5 0.18548 0.08506 2.180 0.033517 *
## K30 0.29799 0.08299 3.591 0.000703 ***
## K60 0.34434 0.08218 4.190 0.000102 ***
## K120 0.36493 0.08183 4.459 4.1e-05 ***
## K180 0.29542 0.08303 3.558 0.000778 ***
## umid50:K30 0.04344 0.11318 0.384 0.702600
## umid62,5:K30 -0.13669 0.11386 -1.201 0.235081
## umid50:K60 0.11559 0.11136 1.038 0.303817
## umid62,5:K60 0.09174 0.11027 0.832 0.409039
## umid50:K120 0.11860 0.11088 1.070 0.289461
## umid62,5:K120 0.16266 0.11145 1.460 0.150096
## umid50:K180 0.28832 0.11085 2.601 0.011917 *
## umid62,5:K180 0.21569 0.11021 1.957 0.055430 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 2.28854)
##
## Null deviance: 761.43 on 73 degrees of freedom
## Residual deviance: 124.72 on 55 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 4
#-----------------------------------------------------------------------
# Modelo Poisson Generalizado.
L <- with(soja,
list(y = ngra, offset = 1, X = model.matrix(m0)))
# Usa as estimativas do Poisson como valores iniciais.
start <- c(alpha = 0, coef(m0))
parnames(llpgnz) <- names(start)
# Com alpha fixo em 0 corresponde à Poisson.
m2 <- mle2(llpgnz, start = start, data = L,
fixed = list(alpha = 0), vecpar = TRUE)
# Mesma medida de ajuste e estimativas.
c(logLik(m2), logLik(m0))
## [1] -321.6692 -321.6692
# Poisson Generalizada.
m3 <- mle2(llpgnz, start = start, data = L, vecpar = TRUE)
# Teste para nulinidade do parâmetro de dispersão (H_0: alpha == 0).
anova(m3, m2)
## Likelihood Ratio Tests
## Model 1: m3, [llpgnz]: alpha+(Intercept)+blocII+blocIII+blocIV+
## blocV+umid50+umid62,5+K30+K60+K120+K180+
## umid50:K30+umid62,5:K30+umid50:K60+umid62,5:K60+
## umid50:K120+umid62,5:K120+umid50:K180+umid62,5:K180
## Model 2: m2, [llpgnz]: (Intercept)+blocII+blocIII+blocIV+blocV+
## umid50+umid62,5+K30+K60+K120+K180+umid50:K30+
## umid62,5:K30+umid50:K60+umid62,5:K60+umid50:K120+
## umid62,5:K120+umid50:K180+umid62,5:K180
## Tot Df Deviance Chisq Df Pr(>Chisq)
## 1 20 630.79
## 2 19 643.34 12.544 1 0.0003975 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Estimaitvas dos parâmetros.
cbind("PoissonGLM" = c(NA, coef(m0)),
"PoissonML" = coef(m2),
"PGeneraliz" = coef(m3))
## PoissonGLM PoissonML PGeneraliz
## NA 0.00000000 0.001778618
## (Intercept) 4.80195973 4.80195972 4.802081183
## blocII -0.01939070 -0.01939070 -0.022097221
## blocIII -0.03662632 -0.03662632 -0.037831855
## blocIV -0.10559332 -0.10559332 -0.099023604
## blocV -0.09313375 -0.09313375 -0.098406090
## umid50 0.13245136 0.13245136 0.133402155
## umid62,5 0.18548293 0.18548293 0.187347770
## K30 0.29799144 0.29799144 0.299365942
## K60 0.34433662 0.34433662 0.344738743
## K120 0.36493092 0.36493092 0.366432817
## K180 0.29542405 0.29542405 0.294825457
## umid50:K30 0.04343930 0.04343930 0.039402786
## umid62,5:K30 -0.13669277 -0.13669277 -0.139668174
## umid50:K60 0.11559375 0.11559375 0.114460635
## umid62,5:K60 0.09174072 0.09174072 0.089168997
## umid50:K120 0.11859658 0.11859658 0.118496463
## umid62,5:K120 0.16266223 0.16266223 0.158280560
## umid50:K180 0.28832017 0.28832017 0.288513923
## umid62,5:K180 0.21568848 0.21568848 0.213872602
# Perfil para o parâmetro de dispersão.
plot(profile(m3, which = "alpha"))
abline(v = 0, lty = 2)
V <- cov2cor(vcov(m3))
corrplot.mixed(V, lower = "number", upper = "ellipse",
diag = "l", tl.pos = "lt", tl.col = "black",
tl.cex = 0.8, col = brewer.pal(9, "Greys")[-(1:3)])
dev.off()
## null device
## 1
# Tamanho das covariâncias com \alpha.
each(sum, mean, max)(abs(V[1, -1]))
## sum mean max
## 0.19898168 0.01047272 0.04733996
# Teste de Wald para a interação.
a <- c(0, attr(model.matrix(m0), "assign"))
ai <- a == max(a)
L <- t(replicate(sum(ai), rbind(coef(m3) * 0), simplify = "matrix"))
L[, ai] <- diag(sum(ai))
# Cáclculo da estatística Chi-quadrado.
crossprod(L %*% coef(m3),
solve(L %*% vcov(m3) %*% t(L),
L %*% coef(m3)))
## [,1]
## [1,] 25.04696
linearHypothesis(model = m0,
hypothesis.matrix = L,
vcov. = vcov(m3),
coef. = coef(m3))
## Linear hypothesis test
##
## Hypothesis:
## umid50:K30 = 0
## umid62,5:K30 = 0
## umid50:K60 = 0
## umid62,5:K60 = 0
## umid50:K120 = 0
## umid62,5:K120 = 0
## umid50:K180 = 0
## umid62,5:K180 = 0
##
## Model 1: restricted model
## Model 2: ngra ~ bloc + umid * K
##
## Note: Coefficient covariance matrix supplied.
##
## Res.Df Df Chisq Pr(>Chisq)
## 1 63
## 2 55 8 25.047 0.001526 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#-----------------------------------------------------------------------
# Predição com bandas de confiança.
X <- LSmatrix(m0, effect = c("umid", "K"))
pred <- attr(X, "grid")
pred <- transform(pred,
K = as.integer(K),
umid = factor(umid))
pred <- list(pois = pred, quasi = pred, pgen = pred)
# Preditos pela Poisson.
aux <- confint(glht(m0, linfct = X),
calpha = univariate_calpha())$confint
colnames(aux)[1] <- "fit"
pred$pois <- cbind(pred$pois, exp(aux))
# Preditos pela Quasi-Poisson.
aux <- confint(glht(m1, linfct = X),
calpha = univariate_calpha())$confint
colnames(aux)[1] <- "fit"
pred$quasi <- cbind(pred$quasi, exp(aux))
# Preditos pela Poisson Generalizada.
aux <- predict(m3, newdata = X,
interval = "confidence", type = "response")
pred$pgen <- cbind(pred$pgen, aux[, c(2, 1, 3)])
# Junta o resultado dos 3 modelos.
pred <- ldply(pred, .id = "modelo")
pred <- arrange(pred, umid, K, modelo)
str(pred)
## 'data.frame': 45 obs. of 6 variables:
## $ modelo: Factor w/ 3 levels "pois","quasi",..: 1 2 3 1 2 3 1 2 3 1 ...
## $ umid : Factor w/ 3 levels "37,5","50","62,5": 1 1 1 1 1 1 1 1 1 1 ...
## $ K : int 0 0 0 30 30 30 60 60 60 120 ...
## $ fit : num 116 116 116 156 156 ...
## $ lwr : num 107 102 105 145 140 ...
## $ upr : num 126 131 128 167 173 ...
key <- list(type = "o", divide = 1,
lines = list(pch = 1:nlevels(pred$modelo),
lty = 1, col = 1),
text = list(c("Poisson", "Quasi-Poisson",
"Poisson Generelizada")))
xyplot(fit ~ K | umid, data = pred,
layout = c(NA, 1), as.table = TRUE,
xlim = extendrange(range(pred$K), f = 0.1),
key = key, pch = pred$modelo,
xlab = expression("Dose de potássio"~(mg~dm^{-3})),
ylab = "Número de grãos por parcela",
ly = pred$lwr, uy = pred$upr, cty = "bars", length = 0,
prepanel = prepanel.cbH,
desloc = 8 * scale(as.integer(pred$modelo), scale = FALSE),
panel = panel.cbH)
#-----------------------------------------------------------------------
# Número de grãos por vagem (offset).
xyplot(ngra/nvag ~ K | umid, data = soja, layout = c(NA, 1),
type = c("p", "smooth"),
xlab = expression("Dose de potássio"~(mg~dm^{-3})),
ylab = "Número de grãos por vagem",
strip = strip.custom(strip.names = TRUE, var.name = "Umidade"))
#-----------------------------------------------------------------------
# Modelo Poisson.
m0 <- glm(ngra ~ offset(log(nvag)) + bloc + umid * K,
data = soja, family = poisson)
m1 <- update(m0, family = quasipoisson)
# Diagnóstico.
par(mfrow = c(2, 2))
plot(m0); layout(1)
# Medidas de decisão.
# anova(m0, test = "Chisq")
anova(m1, test = "F")
## Analysis of Deviance Table
##
## Model: quasipoisson, link: log
##
## Response: ngra
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev F Pr(>F)
## NULL 73 34.238
## bloc 4 2.0264 69 32.212 1.1637 0.33686
## umid 2 1.7457 67 30.466 2.0050 0.14439
## K 4 3.8324 63 26.634 2.2009 0.08082 .
## umid:K 8 2.6490 55 23.984 0.7606 0.63838
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m1)
##
## Call:
## glm(formula = ngra ~ offset(log(nvag)) + bloc + umid * K, family = quasipoisson,
## data = soja)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.52741 -0.46025 0.06782 0.45274 1.05490
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.848160 0.029477 28.774 <2e-16 ***
## blocII 0.011313 0.017535 0.645 0.5215
## blocIII 0.036250 0.017615 2.058 0.0444 *
## blocIV 0.019384 0.017940 1.080 0.2846
## blocV 0.016100 0.018448 0.873 0.3866
## umid50 -0.001908 0.037574 -0.051 0.9597
## umid62,5 -0.031173 0.037124 -0.840 0.4047
## K30 0.022867 0.036200 0.632 0.5302
## K60 0.034499 0.035862 0.962 0.3403
## K120 0.035322 0.035703 0.989 0.3268
## K180 0.040586 0.036221 1.121 0.2674
## umid50:K30 -0.018692 0.049385 -0.378 0.7065
## umid62,5:K30 -0.028459 0.049680 -0.573 0.5691
## umid50:K60 -0.048182 0.048583 -0.992 0.3257
## umid62,5:K60 -0.014035 0.048103 -0.292 0.7716
## umid50:K120 -0.030438 0.048367 -0.629 0.5317
## umid62,5:K120 0.045524 0.048714 0.935 0.3541
## umid50:K180 -0.016302 0.048354 -0.337 0.7373
## umid62,5:K180 0.016911 0.048076 0.352 0.7264
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 0.4353299)
##
## Null deviance: 34.238 on 73 degrees of freedom
## Residual deviance: 23.984 on 55 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 3
# Declara um modelo aditivo.
m0 <- glm(ngra ~ offset(log(nvag)) + bloc + umid + K,
data = soja, family = poisson)
m1 <- update(m0, family = quasipoisson)
anova(m1, test = "F")
## Analysis of Deviance Table
##
## Model: quasipoisson, link: log
##
## Response: ngra
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev F Pr(>F)
## NULL 73 34.238
## bloc 4 2.0264 69 32.212 1.2021 0.31880
## umid 2 1.7457 67 30.466 2.0711 0.13455
## K 4 3.8324 63 26.634 2.2734 0.07114 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#-----------------------------------------------------------------------
# Modelo Poisson Generalizado.
L <- with(soja,
list(y = ngra, offset = nvag, X = model.matrix(m0)))
# De acordo com a estimativa de phi da Quasi-Poisson, esse dado é
# subdisperso. Já que na verossimilhaça (1 + alpha * y) > -1 quando
# alpha < 0, então o menor valor possível para gamma para ter uma
# log-verossimilhança avaliável é
-1/max(soja$ngra)
## [1] -0.003690037
# Mesmo com esse lower bound, o valor chute para alpha foi definido por
# tentativa erro. O valor -0.003 dá Error e o valor -0.002 na hora de
# perfilhar encontra um mínimo melhor. Então, por tentativa erro
# chegou-se no -0.0026.
start <- c(alpha = -0.0026, coef(m0))
parnames(llpgnz) <- names(start)
# Com alpha fixo em 0 corresponde à Poisson.
m2 <- mle2(llpgnz, start = start, data = L,
fixed = list(alpha = 0), vecpar = TRUE)
# Mesma medida de ajuste e estimativas.
c(logLik(m2), logLik(m0))
## [1] -272.6263 -272.6263
# Poisson Generalizada.
m3 <- mle2(llpgnz, start = start, data = L, vecpar = TRUE)
# Teste para nulinidade do parâmetro de dispersão (H_0: alpha == 0).
anova(m3, m2)
## Likelihood Ratio Tests
## Model 1: m3, [llpgnz]: alpha+(Intercept)+blocII+blocIII+blocIV+
## blocV+umid50+umid62,5+K30+K60+K120+K180
## Model 2: m2, [llpgnz]: (Intercept)+blocII+blocIII+blocIV+blocV+
## umid50+umid62,5+K30+K60+K120+K180
## Tot Df Deviance Chisq Df Pr(>Chisq)
## 1 12 516.81
## 2 11 545.25 28.448 1 9.627e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cbind("PoissonGLM" = c(NA, coef(m0)),
"PoissonML" = coef(m2),
"PGeneraliz" = coef(m3))
## PoissonGLM PoissonML PGeneraliz
## NA 0.000000000 -0.002108602
## (Intercept) 0.855552235 0.855552234 0.851442205
## blocII 0.011126118 0.011126118 0.008402336
## blocIII 0.036245340 0.036245340 0.035298311
## blocIV 0.018895822 0.018895822 0.017120556
## blocV 0.012659290 0.012659290 0.007123349
## umid50 -0.026631767 -0.026631767 -0.026018732
## umid62,5 -0.026250165 -0.026250165 -0.020361875
## K30 0.007448488 0.007448488 0.008477443
## K60 0.012551823 0.012551823 0.012559113
## K120 0.039757260 0.039757260 0.045813881
## K180 0.041632187 0.041632187 0.046369545
# Perfil para o parâmetro de dispersão.
plot(profile(m3, which = "alpha"))
V <- cov2cor(vcov(m3))
corrplot.mixed(V, lower = "number", upper = "ellipse",
diag = "l", tl.pos = "lt", tl.col = "black",
tl.cex = 0.8, col = brewer.pal(9, "Greys")[-(1:3)])
dev.off()
## null device
## 1
# Tamanho das covariâncias com \alpha.
each(sum, mean, max)(abs(V[1, -1]))
## sum mean max
## 0.49953096 0.04541191 0.11695102
# Teste de Wald para a interação.
a <- c(0, attr(model.matrix(m0), "assign"))
ai <- a == max(a)
L <- t(replicate(sum(ai), rbind(coef(m3) * 0), simplify = "matrix"))
L[, ai] <- diag(sum(ai))
# Cáclculo da estatística Chi-quadrado.
crossprod(L %*% coef(m3),
solve(L %*% vcov(m3) %*% t(L),
L %*% coef(m3)))
## [,1]
## [1,] 14.26048
linearHypothesis(model = m0,
hypothesis.matrix = L,
vcov. = vcov(m3),
coef. = coef(m3))
## Linear hypothesis test
##
## Hypothesis:
## K30 = 0
## K60 = 0
## K120 = 0
## K180 = 0
##
## Model 1: restricted model
## Model 2: ngra ~ offset(log(nvag)) + bloc + umid + K
##
## Note: Coefficient covariance matrix supplied.
##
## Res.Df Df Chisq Pr(>Chisq)
## 1 67
## 2 63 4 14.261 0.006508 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#-----------------------------------------------------------------------
# Predição com bandas de confiança.
# Por causa do offset, não tem como usar a LSmatrix.
pred <- unique(subset(soja, select = c("umid", "K")))
X <- model.matrix(formula(m0)[-2],
data = cbind(nvag = 1, bloc = soja$bloc[1], pred))
i <- grep(x = colnames(X), pattern = "^bloc")
X[, i] <- X[, i] * 0 + 1/(length(i) + 1)
head(X)
## (Intercept) blocII blocIII blocIV blocV umid50 umid62,5 K30 K60
## 1 1 0.2 0.2 0.2 0.2 0 0 0 0
## 2 1 0.2 0.2 0.2 0.2 0 0 1 0
## 3 1 0.2 0.2 0.2 0.2 0 0 0 1
## 4 1 0.2 0.2 0.2 0.2 0 0 0 0
## 5 1 0.2 0.2 0.2 0.2 0 0 0 0
## 6 1 0.2 0.2 0.2 0.2 1 0 0 0
## K120 K180
## 1 0 0
## 2 0 0
## 3 0 0
## 4 1 0
## 5 0 1
## 6 0 0
pred <- list(pois = pred, quasi = pred, pgen = pred)
# Preditos pela Poisson.
aux <- confint(glht(m0, linfct = X),
calpha = univariate_calpha())$confint
colnames(aux)[1] <- "fit"
pred$pois <- cbind(pred$pois, exp(aux))
# Preditos pela Quasi-Poisson.
aux <- confint(glht(m1, linfct = X),
calpha = univariate_calpha())$confint
colnames(aux)[1] <- "fit"
pred$quasi <- cbind(pred$quasi, exp(aux))
# Preditos pela Poisson Generalizada.
aux <- predict(m3, newdata = X,
interval = "confidence", type = "response")
pred$pgen <- cbind(pred$pgen, aux[, c(2, 1, 3)])
# Junta o resultado dos 3 modelos.
pred <- ldply(pred, .id = "modelo")
pred <- arrange(pred, umid, K, modelo)
pred$K <- as.numeric(as.character(pred$K))
key <- list(type = "o", divide = 1,
lines = list(pch = 1:nlevels(pred$modelo),
lty = 1, col = 1),
text = list(c("Poisson", "Quasi-Poisson",
"Poisson Generelizada")))
xyplot(fit ~ K | umid, data = pred,
layout = c(NA, 1), as.table = TRUE,
xlim = extendrange(range(pred$K), f = 0.2),
key = key, pch = pred$modelo,
xlab = expression("Dose de potássio"~(mg~dm^{-3})),
ylab = "Número de grãos por parcela",
ly = pred$lwr, uy = pred$upr, cty = "bars", length = 0,
prepanel = prepanel.cbH,
desloc = 8 * scale(as.integer(pred$modelo), scale = FALSE),
panel = panel.cbH)
Experimento conduzido em casa de vegetação para avaliar o efeito da desfolha, em diferentes fases de desenvolvimento do algodão, sobre a produção da cultura. As parcelas foram vasos com duas plantas que tiveram a área das folhas removidas com uma tesoura, simulando o ataque de insetos desfolhadores, nos níveis de 0, 25, 50, 75 e 100% de remoção de área foliar. Em cada fase de desenvolvimento (de 5), 5 parcelas sofreram desfolha nos níveis mencionados. O número de capulhos ao final do experimento foi contado.
#-----------------------------------------------------------------------
# Número de capulhos em função do nível de desfolha artificial e fase
# fenológica do algodoeiro.
data(capdesfo, package = "MRDCr")
str(capdesfo)
## 'data.frame': 125 obs. of 4 variables:
## $ est : Factor w/ 5 levels "vegetativo","botão floral",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ des : num 0 0 0 0 0 0.25 0.25 0.25 0.25 0.25 ...
## $ rept: int 1 2 3 4 5 1 2 3 4 5 ...
## $ ncap: int 10 9 8 8 10 11 9 10 10 10 ...
p1 <- xyplot(ncap ~ des | est, data = capdesfo,
col = 1, type = c("p", "smooth"), col.line = "gray50",
layout = c(2, 3), as.table = TRUE,
xlim = extendrange(c(0:1), f = 0.15),
xlab = "Nível de desfolhas artificial",
ylab = "Número de capulho produzidos",
spread = 0.035, panel = panel.beeswarm)
p1
#-----------------------------------------------------------------------
# Modelo Poisson.
m0 <- glm(ncap ~ est * (des + I(des^2)),
data = capdesfo, family = poisson)
m1 <- update(m0, family = quasipoisson)
par(mfrow = c(2, 2))
plot(m0); layout(1)
anova(m0, test = "Chisq")
## Analysis of Deviance Table
##
## Model: poisson, link: log
##
## Response: ncap
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 124 75.514
## est 4 19.9582 120 55.556 0.000509 ***
## des 1 15.8643 119 39.692 6.805e-05 ***
## I(des^2) 1 1.2926 118 38.399 0.255566
## est:des 4 6.7085 114 31.691 0.152119
## est:I(des^2) 4 6.3592 110 25.331 0.173883
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m1, test = "F")
## Analysis of Deviance Table
##
## Model: quasipoisson, link: log
##
## Response: ncap
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev F Pr(>F)
## NULL 124 75.514
## est 4 19.9582 120 55.556 21.5543 3.775e-13 ***
## des 1 15.8643 119 39.692 68.5317 3.269e-13 ***
## I(des^2) 1 1.2926 118 38.399 5.5839 0.01988 *
## est:des 4 6.7085 114 31.691 7.2450 3.236e-05 ***
## est:I(des^2) 4 6.3592 110 25.331 6.8677 5.674e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m1)
##
## Call:
## glm(formula = ncap ~ est * (des + I(des^2)), family = quasipoisson,
## data = capdesfo)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.07706 -0.30981 -0.02283 0.27044 1.16650
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.21424 0.06706 33.018 < 2e-16 ***
## estbotão floral -0.08003 0.09655 -0.829 0.40898
## estflorecimento -0.02723 0.09626 -0.283 0.77781
## estmaça -0.14861 0.09867 -1.506 0.13489
## estcapulho 0.11291 0.09247 1.221 0.22465
## des 0.34859 0.32741 1.065 0.28935
## I(des^2) -0.73843 0.32397 -2.279 0.02458 *
## estbotão floral:des 0.13638 0.46401 0.294 0.76937
## estflorecimento:des -1.58189 0.49140 -3.219 0.00169 **
## estmaça:des 0.47548 0.49046 0.969 0.33444
## estcapulho:des -0.82100 0.45204 -1.816 0.07206 .
## estbotão floral:I(des^2) 0.10435 0.45451 0.230 0.81884
## estflorecimento:I(des^2) 1.40435 0.49033 2.864 0.00501 **
## estmaça:I(des^2) -0.92938 0.49667 -1.871 0.06397 .
## estcapulho:I(des^2) 1.07565 0.44314 2.427 0.01683 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 0.2314882)
##
## Null deviance: 75.514 on 124 degrees of freedom
## Residual deviance: 25.331 on 110 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 4
#-----------------------------------------------------------------------
# Modelo Poisson Generalizada.
L <- with(capdesfo,
list(y = ncap, offset = 1, X = model.matrix(m0)))
start <- c(alpha = log(1), coef(m0))
parnames(llpgnz) <- names(start)
# Modelo Poisson também.
m2 <- mle2(llpgnz, start = start, data = L,
fixed = list(alpha = 0), vecpar = TRUE)
c(logLik(m2), logLik(m0))
## [1] -254.8414 -254.8414
# Modelo Poisson Generalizado.
m3 <- mle2(llpgnz, start = start, data = L, vecpar = TRUE)
logLik(m3)
## 'log Lik.' -210.6668 (df=16)
anova(m3, m2)
## Likelihood Ratio Tests
## Model 1: m3, [llpgnz]: alpha+(Intercept)+estbotão floral+
## estflorecimento+estmaça+estcapulho+des+I(des^2)+
## estbotão floral:des+estflorecimento:des+estmaça:des+
## estcapulho:des+estbotão floral:I(des^2)+
## estflorecimento:I(des^2)+estmaça:I(des^2)+
## estcapulho:I(des^2)
## Model 2: m2, [llpgnz]: (Intercept)+estbotão floral+
## estflorecimento+estmaça+estcapulho+des+I(des^2)+
## estbotão floral:des+estflorecimento:des+estmaça:des+
## estcapulho:des+estbotão floral:I(des^2)+
## estflorecimento:I(des^2)+estmaça:I(des^2)+
## estcapulho:I(des^2)
## Tot Df Deviance Chisq Df Pr(>Chisq)
## 1 16 421.33
## 2 15 509.68 88.349 1 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m3)
## Maximum likelihood estimation
##
## Call:
## mle2(minuslogl = llpgnz, start = start, data = L, vecpar = TRUE)
##
## Coefficients:
## Estimate Std. Error z value Pr(z)
## alpha -0.0611259 0.0031211 -19.5847 < 2.2e-16
## (Intercept) 2.2159873 0.0603520 36.7177 < 2.2e-16
## estbotão floral -0.0792916 0.0914575 -0.8670 0.3859545
## estflorecimento -0.0022499 0.0894092 -0.0252 0.9799245
## estmaça -0.1453929 0.1031466 -1.4096 0.1586649
## estcapulho 0.1054861 0.0781060 1.3506 0.1768394
## des 0.3533745 0.3101810 1.1393 0.2545978
## I(des^2) -0.7506691 0.3305526 -2.2710 0.0231499
## estbotão floral:des 0.1116847 0.4465137 0.2501 0.8024899
## estflorecimento:des -1.7775352 0.5328740 -3.3358 0.0008507
## estmaça:des 0.2618588 0.5439359 0.4814 0.6302217
## estcapulho:des -0.7890219 0.4081733 -1.9331 0.0532292
## estbotão floral:I(des^2) 0.1369131 0.4587833 0.2984 0.7653777
## estflorecimento:I(des^2) 1.5933721 0.5624116 2.8331 0.0046098
## estmaça:I(des^2) -0.6273100 0.5754683 -1.0901 0.2756752
## estcapulho:I(des^2) 1.0546017 0.4257783 2.4769 0.0132536
##
## alpha ***
## (Intercept) ***
## estbotão floral
## estflorecimento
## estmaça
## estcapulho
## des
## I(des^2) *
## estbotão floral:des
## estflorecimento:des ***
## estmaça:des
## estcapulho:des .
## estbotão floral:I(des^2)
## estflorecimento:I(des^2) **
## estmaça:I(des^2)
## estcapulho:I(des^2) *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## -2 log L: 421.3336
plot(profile(m3, which = "alpha"))
cbind("PoissonGLM" = c(NA, coef(m0)),
"PoissonML" = coef(m2),
"PGeneraliz" = coef(m3))
## PoissonGLM PoissonML PGeneraliz
## NA 0.00000000 -0.061125928
## (Intercept) 2.21424242 2.21424242 2.215987267
## estbotão floral -0.08002756 -0.08002756 -0.079291571
## estflorecimento -0.02722855 -0.02722855 -0.002249855
## estmaça -0.14861263 -0.14861263 -0.145392921
## estcapulho 0.11291268 0.11291268 0.105486116
## des 0.34858564 0.34858564 0.353374487
## I(des^2) -0.73843427 -0.73843427 -0.750669137
## estbotão floral:des 0.13638395 0.13638395 0.111684700
## estflorecimento:des -1.58188650 -1.58188650 -1.777535174
## estmaça:des 0.47548300 0.47548300 0.261858845
## estcapulho:des -0.82100307 -0.82100307 -0.789021942
## estbotão floral:I(des^2) 0.10435096 0.10435096 0.136913065
## estflorecimento:I(des^2) 1.40435178 1.40435178 1.593372095
## estmaça:I(des^2) -0.92937720 -0.92937720 -0.627310018
## estcapulho:I(des^2) 1.07565430 1.07565430 1.054601664
V <- cov2cor(vcov(m3))
corrplot.mixed(V, lower = "number", upper = "ellipse",
diag = "l", tl.pos = "lt", tl.col = "black",
tl.cex = 0.8, col = brewer.pal(9, "Greys")[-(1:3)])
dev.off()
## null device
## 1
# Tamanho das covariâncias com \alpha.
each(sum, mean, max)(abs(V[1, -1]))
## sum mean max
## 0.22237413 0.01482494 0.04892709
# Teste de Wald para a interação.
a <- c(0, attr(model.matrix(m0), "assign"))
ai <- a == max(a)
L <- t(replicate(sum(ai), rbind(coef(m3) * 0), simplify = "matrix"))
L[, ai] <- diag(sum(ai))
# Teste de Wald explicito.
crossprod(L %*% coef(m3),
solve(L %*% vcov(m3) %*% t(L),
L %*% coef(m3)))
## [,1]
## [1,] 19.38121
# Teste de Wald para interação (poderia ser LRT, claro).
# É necessário um objeto glm, mas necesse caso ele não usado para nada.
linearHypothesis(model = m0,
hypothesis.matrix = L,
vcov. = vcov(m3),
coef. = coef(m3))
## Linear hypothesis test
##
## Hypothesis:
## estbotão floral:I(des^2) = 0
## estflorecimento:I(des^2) = 0
## estmaça:I(des^2) = 0
## estcapulho:I(des^2) = 0
##
## Model 1: restricted model
## Model 2: ncap ~ est * (des + I(des^2))
##
## Note: Coefficient covariance matrix supplied.
##
## Res.Df Df Chisq Pr(>Chisq)
## 1 114
## 2 110 4 19.381 0.0006613 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#-----------------------------------------------------------------------
# Predição com bandas de confiança.
pred <- with(capdesfo, expand.grid(est = levels(est),
des = seq(0, 1, by = 0.025)))
X <- model.matrix(formula(m0)[-2], data = pred)
pred <- list(pois = pred, quasi = pred, pgen = pred)
# Preditos pela Poisson.
aux <- confint(glht(m0, linfct = X),
calpha = univariate_calpha())$confint
colnames(aux)[1] <- "fit"
pred$pois <- cbind(pred$pois, exp(aux))
# Preditos pela Quasi-Poisson.
aux <- confint(glht(m1, linfct = X),
calpha = univariate_calpha())$confint
colnames(aux)[1] <- "fit"
pred$quasi <- cbind(pred$quasi, exp(aux))
# Preditos pela Poisson Generalizada.
aux <- predict(m3, newdata = X,
interval = "confidence", type = "response")
pred$pgen <- cbind(pred$pgen, aux[, c(2, 1, 3)])
pred <- ldply(pred, .id = "modelo")
pred <- arrange(pred, est, des, modelo)
key <- list(lines = list(lty = 1),
text = list(c("Poisson", "Quasi-Poisson",
"Poisson Generelizada")))
key$lines$col <-
trellis.par.get("superpose.line")$col[1:nlevels(pred$modelo)]
p2 <- xyplot(fit ~ des | est, data = pred, groups = modelo,
layout = c(NA, 1), as.table = TRUE,
xlim = extendrange(range(pred$des), f = 0.1),
type = "l", key = key,
ly = pred$lwr, uy = pred$upr,
cty = "bands", alpha = 0.35,
prepanel = prepanel.cbH,
panel.groups = panel.cbH,
panel = panel.superpose)
# p2
update(p1, type = "p", layout = c(NA, 1),
key = key, spread = 0.07) +
as.layer(p2, under = TRUE)
#-----------------------------------------------------------------------
data(nematoide, package = "MRDCr")
str(nematoide)
## 'data.frame': 94 obs. of 5 variables:
## $ cult: Factor w/ 19 levels "A","B","C","D",..: 1 1 1 1 1 2 2 2 2 2 ...
## $ mfr : num 10.52 11.03 6.42 8.16 4.48 ...
## $ vol : int 40 40 40 40 40 40 40 40 40 40 ...
## $ nema: int 4 5 3 4 3 2 2 2 2 2 ...
## $ off : num 0.263 0.276 0.161 0.204 0.112 ...
# Número de nematóides por grama de raíz.
plot(nema ~ off, data = nematoide)
# Média do número de nematóides por grama de raíz.
mv <- aggregate(cbind(y = nema/off) ~ cult, data = nematoide,
FUN = function(x) c(m = mean(x), v = var(x)))
xyplot(y[, "v"] ~ y[, "m"], data = mv,
xlab = "Média amostral",
ylab = "Variância amostral") +
layer(panel.abline(a = 0, b = 1, lty = 2))
#-----------------------------------------------------------------------
# Ajuste do Poisson.
m0 <- glm(nema ~ offset(log(off)) + cult,
data = nematoide,
family = poisson)
m1 <- update(m0, family = quasipoisson)
# Diagnóstico.
par(mfrow = c(2, 2))
plot(m0); layout(1)
# Estimativas dos parâmetros.
summary(m1)
##
## Call:
## glm(formula = nema ~ offset(log(off)) + cult, family = quasipoisson,
## data = nematoide)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.9455 -0.6693 0.1100 1.1519 4.7603
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.9293 0.4496 6.515 7.42e-09 ***
## cultB 0.1662 0.7657 0.217 0.828703
## cultC 0.3417 0.6769 0.505 0.615154
## cultD 0.9128 0.6359 1.436 0.155278
## cultE 0.9692 0.6076 1.595 0.114879
## cultF 0.3973 0.6903 0.576 0.566621
## cultG -0.4507 0.7931 -0.568 0.571522
## cultH 0.3218 0.8665 0.371 0.711433
## cultI 0.8808 0.6018 1.464 0.147507
## cultJ 1.1982 0.6359 1.884 0.063390 .
## cultK 1.4511 0.5965 2.433 0.017369 *
## cultL 1.4299 0.5362 2.667 0.009381 **
## cultM 1.6138 0.5507 2.931 0.004482 **
## cultN 1.7743 0.4958 3.579 0.000610 ***
## cultO 1.5776 0.5268 2.995 0.003718 **
## cultP 1.6719 0.5103 3.277 0.001593 **
## cultQ 2.2105 0.4913 4.499 2.45e-05 ***
## cultR 2.2080 0.5103 4.327 4.60e-05 ***
## cultS 1.9155 0.4958 3.863 0.000236 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 3.841169)
##
## Null deviance: 556.73 on 93 degrees of freedom
## Residual deviance: 212.54 on 75 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 6
# Quadro de deviance.
# anova(m0, test = "Chisq")
anova(m1, test = "F")
## Analysis of Deviance Table
##
## Model: quasipoisson, link: log
##
## Response: nema
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev F Pr(>F)
## NULL 93 556.73
## cult 18 344.19 75 212.54 4.978 3.475e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#-----------------------------------------------------------------------
# Ajuste da Poisson Generalizada.
L <- with(nematoide,
list(y = nema, offset = off, X = model.matrix(m0)))
start <- c(alpha = log(1), coef(m0))
parnames(llpgnz) <- names(start)
# Modelo Poisson também.
m2 <- mle2(llpgnz, start = start, data = L,
fixed = list(alpha = 0), vecpar = TRUE)
c(logLik(m2), logLik(m0))
## [1] -274.5007 -274.5007
# Poisson Generalizada.
m3 <- pgnz(formula(m0), data = nematoide)
# Diferença de deviance.
# 2 * diff(c(logLik(m0), logLik(m3)))
anova(m3, m2)
## Likelihood Ratio Tests
## Model 1: m3, [llpgnz]: alpha+(Intercept)+cultB+cultC+cultD+
## cultE+cultF+cultG+cultH+cultI+cultJ+cultK+cultL+
## cultM+cultN+cultO+cultP+cultQ+cultR+cultS
## Model 2: m2, [llpgnz]: (Intercept)+cultB+cultC+cultD+cultE+
## cultF+cultG+cultH+cultI+cultJ+cultK+cultL+cultM+
## cultN+cultO+cultP+cultQ+cultR+cultS
## Tot Df Deviance Chisq Df Pr(>Chisq)
## 1 20 497.89
## 2 19 549.00 51.112 1 8.725e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Perfil de log-verossimilhança para o parâmetro de dispersão.
plot(profile(m3, which = "alpha"))
# Covariância.
V <- cov2cor(vcov(m3))
corrplot.mixed(V, lower = "number", upper = "ellipse",
diag = "l", tl.pos = "lt", tl.col = "black",
tl.cex = 0.8, col = brewer.pal(9, "Greys")[-(1:3)])
dev.off()
## null device
## 1
# Tamanho das covariâncias com \alpha.
each(sum, mean, max)(abs(V[1, -1]))
## sum mean max
## 1.06647709 0.05613037 0.16898969
# Gráfico das estimativas.
pars <- data.frame(Pois = c(0, coef(m0)), PGen = coef(m3))
xyplot(PGen ~ Pois, data = pars, aspect = "iso", grid = TRUE) +
layer(panel.abline(a = 0, b = 1, lty = 2))
#-----------------------------------------------------------------------
X <- model.matrix(m0)
# # Predito do número de nematóides observado (considera o offset).
# with(nematoide, {
# cbind(y = nema,
# Pois = nematoide$off * exp(X %*% coef(m0)),
# PGen = nematoide$off * exp(X %*% coef(m1)[-1]))
# })
# Predito do número de nematóides por grama de raíz.
pred <- with(nematoide, {
data.frame(y = nema/off,
Pois = c(exp(X %*% coef(m0))),
PGen = c(exp(X %*% coef(m3)[-1])))
})
str(pred)
## 'data.frame': 94 obs. of 3 variables:
## $ y : num 15.2 18.1 18.7 19.6 26.8 ...
## $ Pois: num 18.7 18.7 18.7 18.7 18.7 ...
## $ PGen: num 19 19 19 19 19 ...
splom(pred) + layer(panel.abline(a = 0, b = 1))
# Correlação predito x observado.
cor(pred)
## y Pois PGen
## y 1.0000000 0.6157455 0.6455661
## Pois 0.6157455 1.0000000 0.9497288
## PGen 0.6455661 0.9497288 1.0000000
# Média das observações de das estimativas por cultivar.
predm <- aggregate(as.matrix(pred) ~ cult, data = nematoide, FUN = mean)
cor(predm[, -1])
## y Pois PGen
## y 1.0000000 0.9374510 0.9824583
## Pois 0.9374510 1.0000000 0.9500745
## PGen 0.9824583 0.9500745 1.0000000
#-----------------------------------------------------------------------
# Predição com intervalos de confiança.
pred <- unique(subset(nematoide, select = cult))
X <- model.matrix(~cult, data = pred)
pred <- list(pois = pred, quasi = pred, pgen = pred)
# Preditos pela Poisson.
aux <- confint(glht(m0, linfct = X),
calpha = univariate_calpha())$confint
colnames(aux)[1] <- "fit"
pred$pois <- cbind(pred$pois, exp(aux))
# Preditos pela Quasi-Poisson.
aux <- confint(glht(m1, linfct = X),
calpha = univariate_calpha())$confint
colnames(aux)[1] <- "fit"
pred$quasi <- cbind(pred$quasi, exp(aux))
# Preditos pela Poisson Generalizada.
aux <- predict(m3, newdata = X,
interval = "confidence", type = "response")
pred$pgen <- cbind(pred$pgen, aux[, c(2, 1, 3)])
pred <- ldply(pred, .id = "modelo")
pred <- arrange(pred, cult, modelo)
key <- list(type = "o", divide = 1,
lines = list(pch = 1:nlevels(pred$modelo),
lty = 1, col = 1),
text = list(c("Poisson", "Quasi-Poisson",
"Poisson Generelizada")))
xyplot(nema/off ~ cult, data = nematoide,
key = key,
xlab = "Cultivar de feijão",
ylab = "Número de nematóides por grama de raíz") +
as.layer(
xyplot(fit ~ cult, data = pred,
pch = pred$modelo,
ly = pred$lwr, uy = pred$upr,
cty = "bars", length = 0,
prepanel = prepanel.cbH,
desloc = 0.25 * scale(as.integer(pred$modelo),
scale = FALSE),
panel = panel.cbH))
#-----------------------------------------------------------------------
# Resíduos de Pearson.
X <- model.matrix(m0)
# # Resíduos de Pearson no Poisson.
# with(nematoide, {
# y <- nema
# # haty <- fitted(m0)
# haty <- nematoide$off * exp(X %*% coef(m0))
# sdy <- sqrt(haty)
# cbind((y - haty)/sdy,
# residuals(m0, type = "pearson"))
# })
# Resíduos de Pearson do Poisson Generalizado.
rp <- with(nematoide, {
y <- nema
alph <- coef(m3)["alpha"]
haty <- c(nematoide$off * exp(X %*% coef(m3)[-1]))
sdy <- sqrt(haty) * (1 + alph * haty)
(y - haty)/sdy
})
rp <- stack(data.frame(Pois = residuals(m0, type = "pearson"),
PGen = rp))
qqmath(~values | ind, data = rp,
xlab = "Quantis teóricos",
ylab = "Resíduos de Pearson",
panel = function(...) {
panel.qqmathline(...)
panel.qqmath(...)
})
## Atualizado em 23 de maio de 2016.
##
## R version 3.3.0 (2016-05-03)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 14.04.4 LTS
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=pt_BR.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=pt_BR.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=pt_BR.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=pt_BR.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets
## [7] methods base
##
## other attached packages:
## [1] car_2.1-2 plyr_1.8.3 corrplot_0.77
## [4] bbmle_1.0.18 multcomp_1.4-5 TH.data_1.0-7
## [7] MASS_7.3-45 survival_2.39-4 mvtnorm_1.0-5
## [10] doBy_4.5-15 latticeExtra_0.6-28 RColorBrewer_1.1-2
## [13] lattice_0.20-33 knitr_1.13 MRDCr_0.0-1
## [16] devtools_1.11.1
##
## loaded via a namespace (and not attached):
## [1] Rcpp_0.12.4 nloptr_1.0.4 compiler_3.3.0
## [4] formatR_1.4 tools_3.3.0 lme4_1.1-12
## [7] testthat_1.0.2 digest_0.6.9 nlme_3.1-128
## [10] memoise_1.0.0 evaluate_0.9 mgcv_1.8-12
## [13] Matrix_1.2-6 parallel_3.3.0 yaml_2.1.13
## [16] SparseM_1.7 withr_1.0.1 stringr_1.0.0
## [19] roxygen2_5.0.1 MatrixModels_0.4-1 nnet_7.3-12
## [22] grid_3.3.0 R6_2.1.2 rmarkdown_0.9.6
## [25] minqa_1.2.4 magrittr_1.5 codetools_0.2-14
## [28] htmltools_0.3.5 splines_3.3.0 pbkrtest_0.4-6
## [31] numDeriv_2014.2-1 quantreg_5.21 sandwich_2.3-4
## [34] stringi_1.0-1 crayon_1.3.1 zoo_1.7-13
Modelos de Regressão para Dados de Contagem com o R |
|
61 RBRAS | Departamento de Estatística - UFPR |
23 a 25 de Maio de 2016 | LEG, PET Estatística |
Salvador - Bahia | github.com/leg-ufpr/MRDCr |