Modelos de Regressão para Dados de Contagem com o R

github.com/leg-ufpr/MRDCr
# Definições da sessão.
# devtools::load_all("../")
library(lattice)
library(latticeExtra)
library(bbmle)
library(corrplot)
library(plyr)
library(car)
library(doBy)
library(multcomp)
library(MRDCr)

Função Densidade

Se uma variável aleatória \(Y\) tem distribuição de probabilidades Poisson generalizada, então sua função de probabilidade, na parametrização de média, é

\[ f(y) = \left( \dfrac{\lambda}{1+\alpha\lambda} \right)^{y} \frac{(1+\alpha y)^{y-1}}{y!} \exp\left\{-\lambda \frac{(1+\alpha y)}{(1+\alpha \lambda)}\right\}, \] em que \(\lambda\) é a média da distribuição e \(\alpha\) é a o parâmetro de dispersão. Nessa parametrização, tem-se

A função de log-verossimilhança é \[ \ell(y; \lambda, \alpha) = \sum_{i=1}^{n} y_{i}\ln(\lambda)- \ln(1+\alpha\lambda)+ (y_{i}-1)\ln(1+\alpha y)- \lambda\frac{(1+\alpha y_{i})}{(1+\alpha\lambda)}- \ln(y_{i}!). \]

grid <- expand.grid(lambda = c(2, 8, 15),
                    alpha = c(-0.05, 0, 0.05))
y <- 0:35

py <- mapply(FUN = dpgnz,
             lambda = grid$lambda,
             alpha = grid$alpha,
             MoreArgs = list(y = y), SIMPLIFY = FALSE)
grid <- cbind(grid[rep(1:nrow(grid), each = length(y)), ],
              y = y,
              py = unlist(py))

useOuterStrips(xyplot(py ~ y | factor(lambda) + factor(alpha),
                      ylab = expression(f(y)),
                      xlab = expression(y),
                      data = grid, type = "h",
                      panel = function(x, y, ...) {
                          m <- sum(x * y)
                          panel.xyplot(x, y, ...)
                          panel.abline(v = m, lty = 2)
                      }),
               strip = strip.custom(
                   strip.names = TRUE,
                   var.name = expression(lambda == ""),
                   sep = ""),
               strip.left = strip.custom(
                   strip.names = TRUE,
                   var.name = expression(alpha == ""),
                   sep = ""))

#-----------------------------------------------------------------------
# Relação média variância.

curve(lambda * (1 + 0 * lambda)^2,
      from = 0, to = 10, xname = "lambda",
      ylab = expression(lambda %.% (1 + alpha %.% lambda)^2),
      xlab = expression(lambda))
alpha <- seq(-0.25, 0.25, by = 0.025)
col <- brewer.pal(n = 5, name = "Spectral")
col <- colorRampPalette(colors = col)(length(alpha))
for (a in seq_along(alpha)) {
    curve(lambda * (1 + alpha[a] * lambda)^2,
          add = TRUE, xname = "lambda", col = col[a], lwd = 2)
}

Modelo de Regressão com a Distribuição Poisson Generalizada

#-----------------------------------------------------------------------
# Gráfico do espaço paramétrico de lambda x alpha.

y <- 0:400

fun <- Vectorize(vectorize.args = c("lambda", "alpha"),
                 FUN = function(lambda, alpha) {
                     sum(dpgnz(y = y, lambda = lambda, alpha = alpha))
                 })

grid <- list(lambda = seq(0.2, 50, by = 0.2),
             alpha = seq(-0.98, 0.98, by = 0.02))
grid$sum <- with(grid, outer(lambda, alpha, fun))

grid <- with(grid,
             cbind(expand.grid(lambda = lambda, alpha = alpha),
                   data.frame(sum = c(sum))))

levelplot(sum ~ lambda + alpha,
          data = subset(grid, round(sum, 3) == 1),
          col.regions = gray.colors) +
    layer(panel.abline(h = 0, lty = 2)) +
    layer(panel.curve(-1/x))

# Já que lambda * alpha > -1, então alpha = -1/lambda dá a fronteira.

Verossimilhança e Estimação

# Função de log-Verossimilhança da Poisson Generalizada na
# parametrização de modelo de regressão.
MRDCr::llpgnz
## function(params, y, X, offset = NULL) {
##     # params: vetor de parâmetros;
##     #   params[1]: parâmetro de dispersão (alpha);
##     #   params[-1]: parâmetro de locação (lambda);
##     # y: variável resposta (contagem);
##     # X: matriz do modelo linear;
##     # offset: tamanho do domínio onde y foi medido (exposição);
##     #----------------------------------------
##     if (is.null(offset)) {
##         offset <- 1L
##     }
##     alpha <- params[1]
##     lambda <- offset * exp(X %*% params[-1])
##     z <- 1 + alpha * lambda
##     w <- 1 + alpha * y
##     ll <- y * (log(lambda) - log(z)) +
##         (y - 1) * log(w) -
##         lambda * (w/z) -
##         lfactorial(y)
##     # Negativo da log-likelihood.
##     -sum(ll)
## }
## <environment: namespace:MRDCr>
#-----------------------------------------------------------------------
# Gerando uma amostra aleatória da Poisson, para teste.

# Offset = 2, lambda = 3.
y <- rpois(100, lambda = 2 * 3)

L <- list(y = y,
          offset = rep(2, length(y)),
          X = cbind(rep(1, length(y))))

start <- c(alpha = 0, lambda = 1)
parnames(llpgnz) <- names(start)

# Como \alpha foi fixado em 1, essa ll corresponde à Poisson.
n0 <- mle2(minuslogl = llpgnz,
           start = start, data = L,
           fixed = list(alpha = 0), vecpar = TRUE)

# Para conferir.
c(coef(n0)["lambda"],
  coef(glm(y ~ offset(log(L$offset)), family = poisson)))
##      lambda (Intercept) 
##    1.095273    1.095273
# Estimando o \alpha.
n1 <- mle2(llpgnz, start = start, data = L, vecpar = TRUE)
coef(n1)
##       alpha      lambda 
## 0.005829447 1.095255553
# Perfil de verossimilhança dos parâmetros.
plot(profile(n1))

# Covariância.
V <- cov2cor(vcov(n1))
corrplot.mixed(V, lower = "number", upper = "ellipse",
               diag = "l", tl.pos = "lt", tl.col = "black",
               tl.cex = 0.8, col = brewer.pal(9, "Greys")[-(1:3)])

dev.off()
## null device 
##           1

Número de Vagens Produzidas em Soja

Resultados de um experimento fatorial (3 x 5), em delineamento de blocos casualizados, que estudou a influência de níveis de potássio na adubação de soja em combinação com irrigação em casa de vegetação. As variáveis de contagem registradas nesse experimento foram o número de vagens viáveis (e não viáveis) e o número total de sementes por parcela com duas plantas de soja.

#-----------------------------------------------------------------------
# Carregando e explorando os dados.

data(soja, package = "MRDCr")
str(soja)
## 'data.frame':    75 obs. of  5 variables:
##  $ K   : int  0 30 60 120 180 0 30 60 120 180 ...
##  $ umid: Factor w/ 3 levels "37,5","50","62,5": 1 1 1 1 1 2 2 2 2 2 ...
##  $ bloc: Factor w/ 5 levels "I","II","III",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ ngra: int  136 159 156 171 190 140 193 200 208 237 ...
##  $ nvag: int  56 62 66 68 82 63 86 94 86 97 ...
# A observação 74 é um outlier.
soja <- soja[-74, ]

xyplot(nvag ~ K | umid, data = soja, layout = c(NA, 1),
       type = c("p", "smooth"),
       ylab = "Número de vagens por parcela",
       xlab = expression("Dose de potássio aplicada"~(mg ~ dm^{3})),
       strip = strip.custom(strip.names = TRUE, var.name = "Umidade"))

soja <- transform(soja, K = factor(K))

#-----------------------------------------------------------------------
# Modelo Poisson.

m0 <- glm(nvag ~ bloc + umid * K, data = soja, family = poisson)
m1 <- update(m0, family = quasipoisson)

# Diagnóstico.
par(mfrow = c(2, 2))
plot(m0); layout(1)

# Medidas de decisão.
# anova(m0, test = "Chisq")
anova(m1, test = "F")
## Analysis of Deviance Table
## 
## Model: quasipoisson, link: log
## 
## Response: nvag
## 
## Terms added sequentially (first to last)
## 
## 
##        Df Deviance Resid. Df Resid. Dev       F    Pr(>F)    
## NULL                      73     322.68                      
## bloc    4   14.284        69     308.40  2.9785   0.02684 *  
## umid    2   92.911        67     215.49 38.7485 3.157e-11 ***
## K       4  136.060        63      79.43 28.3719 8.316e-13 ***
## umid:K  8   14.150        55      65.28  1.4754   0.18750    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m1)
## 
## Call:
## glm(formula = nvag ~ bloc + umid * K, family = quasipoisson, 
##     data = soja)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.9681  -0.6393  -0.0195   0.4854   2.3306  
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    3.95369    0.07547  52.389  < 2e-16 ***
## blocII        -0.02928    0.04479  -0.654 0.516036    
## blocIII       -0.07265    0.04529  -1.604 0.114420    
## blocIV        -0.12544    0.04592  -2.732 0.008457 ** 
## blocV         -0.10795    0.04705  -2.294 0.025606 *  
## umid50         0.13404    0.09597   1.397 0.168117    
## umid62,5       0.21656    0.09418   2.299 0.025303 *  
## K30            0.27427    0.09300   2.949 0.004670 ** 
## K60            0.30797    0.09233   3.336 0.001530 ** 
## K120           0.32883    0.09192   3.577 0.000734 ***
## K180           0.25540    0.09338   2.735 0.008376 ** 
## umid50:K30     0.06322    0.12654   0.500 0.619324    
## umid62,5:K30  -0.10747    0.12631  -0.851 0.398532    
## umid50:K60     0.16561    0.12449   1.330 0.188897    
## umid62,5:K60   0.10735    0.12286   0.874 0.386028    
## umid50:K120    0.14920    0.12414   1.202 0.234563    
## umid62,5:K120  0.11839    0.12487   0.948 0.347229    
## umid50:K180    0.30370    0.12439   2.441 0.017873 *  
## umid62,5:K180  0.19838    0.12325   1.610 0.113208    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasipoisson family taken to be 1.198902)
## 
##     Null deviance: 322.684  on 73  degrees of freedom
## Residual deviance:  65.278  on 55  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 4
#-----------------------------------------------------------------------
# Modelo Poisson Generalizado.

L <- with(soja,
          list(y = nvag, offset = 1, X = model.matrix(m0)))

# Usa as estimativas do Poisson como valores iniciais para a PGen.
start <- c(alpha = 0, coef(m0))
parnames(llpgnz) <- names(start)

# Com alpha fixo em 0 corresponde à Poisson.
m2 <- mle2(llpgnz, start = start, data = L,
           fixed = list(alpha = 0), vecpar = TRUE)

# Mesma medida de ajuste e estimativas.
c(logLik(m2), logLik(m0))
## [1] -259.6165 -259.6165
cbind(coef(m2)[-1], coef(m0))
##                      [,1]        [,2]
## (Intercept)    3.95368723  3.95368724
## blocII        -0.02927855 -0.02927854
## blocIII       -0.07265048 -0.07265048
## blocIV        -0.12543798 -0.12543798
## blocV         -0.10794857 -0.10794857
## umid50         0.13404356  0.13404356
## umid62,5       0.21656458  0.21656458
## K30            0.27427290  0.27427290
## K60            0.30796674  0.30796674
## K120           0.32883188  0.32883188
## K180           0.25540441  0.25540441
## umid50:K30     0.06322288  0.06322288
## umid62,5:K30  -0.10747272 -0.10747272
## umid50:K60     0.16561471  0.16561471
## umid62,5:K60   0.10735066  0.10735066
## umid50:K120    0.14920392  0.14920392
## umid62,5:K120  0.11838578  0.11838578
## umid50:K180    0.30369921  0.30369921
## umid62,5:K180  0.19837927  0.19837927
# Poisson Generalizada.
m3 <- mle2(llpgnz, start = start, data = L, vecpar = TRUE)

# Teste para nulinidade do parâmetro de dispersão (H_0: alpha == 0).
anova(m3, m2)
## Likelihood Ratio Tests
## Model 1: m3, [llpgnz]: alpha+(Intercept)+blocII+blocIII+blocIV+
##           blocV+umid50+umid62,5+K30+K60+K120+K180+
##           umid50:K30+umid62,5:K30+umid50:K60+umid62,5:K60+
##           umid50:K120+umid62,5:K120+umid50:K180+umid62,5:K180
## Model 2: m2, [llpgnz]: (Intercept)+blocII+blocIII+blocIV+blocV+
##           umid50+umid62,5+K30+K60+K120+K180+umid50:K30+
##           umid62,5:K30+umid50:K60+umid62,5:K60+umid50:K120+
##           umid62,5:K120+umid50:K180+umid62,5:K180
##   Tot Df Deviance  Chisq Df Pr(>Chisq)
## 1     20   518.19                     
## 2     19   519.23 1.0402  1     0.3078
# Estimativas dos coeficientes.
cbind("PoissonGLM" = c(NA, coef(m0)),
      "PoissonML" = coef(m2),
      "PGeneraliz" = coef(m3))
##                PoissonGLM   PoissonML   PGeneraliz
##                        NA  0.00000000 -0.001042601
## (Intercept)    3.95368724  3.95368723  3.953181451
## blocII        -0.02927854 -0.02927855 -0.027923784
## blocIII       -0.07265048 -0.07265048 -0.072056920
## blocIV        -0.12543798 -0.12543798 -0.127982907
## blocV         -0.10794857 -0.10794857 -0.105466353
## umid50         0.13404356  0.13404356  0.134139707
## umid62,5       0.21656458  0.21656458  0.216194659
## K30            0.27427290  0.27427290  0.273827867
## K60            0.30796674  0.30796674  0.307818797
## K120           0.32883188  0.32883188  0.328229190
## K180           0.25540441  0.25540441  0.255998709
## umid50:K30     0.06322288  0.06322288  0.064744844
## umid62,5:K30  -0.10747272 -0.10747272 -0.106284339
## umid50:K60     0.16561471  0.16561471  0.166265663
## umid62,5:K60   0.10735066  0.10735066  0.108202657
## umid50:K120    0.14920392  0.14920392  0.149064097
## umid62,5:K120  0.11838578  0.11838578  0.120194391
## umid50:K180    0.30369921  0.30369921  0.302905099
## umid62,5:K180  0.19837927  0.19837927  0.198624027
# Perfil para o parâmetro de dispersão.
plot(profile(m3, which = "alpha"))
abline(v = 0, lty = 2)

V <- cov2cor(vcov(m3))
corrplot.mixed(V, lower = "number", upper = "ellipse",
               diag = "l", tl.pos = "lt", tl.col = "black",
               tl.cex = 0.8, col = brewer.pal(9, "Greys")[-(1:3)])

dev.off()
## null device 
##           1
# Tamanho das covariâncias com \alpha.
each(sum, mean, max)(abs(V[1, -1]))
##        sum       mean        max 
## 0.30139729 0.01586302 0.06988971
#-----------------------------------------------------------------------
# Testes de hipótese.

# Teste de Wald para a interação.
a <- c(0, attr(model.matrix(m0), "assign"))
ai <- a == max(a)
L <- t(replicate(sum(ai), rbind(coef(m3) * 0), simplify = "matrix"))
L[, ai] <- diag(sum(ai))

# Cálculo da estatística Chi-quadrado.
# t(L %*% coef(m3)) %*%
#     solve(L %*% vcov(m3) %*% t(L)) %*%
#     (L %*% coef(m3))
crossprod(L %*% coef(m3),
          solve(L %*% vcov(m3) %*% t(L),
                L %*% coef(m3)))
##          [,1]
## [1,] 16.21861
# Teste de Wald para interação (poderia ser LRT, claro).
# É necessário passar um objeto glm mesmo fornecendo o restante a parte.
linearHypothesis(model = m0,
                 hypothesis.matrix = L,
                 vcov. = vcov(m3),
                 coef. = coef(m3))
## Linear hypothesis test
## 
## Hypothesis:
## umid50:K30 = 0
## umid62,5:K30 = 0
## umid50:K60 = 0
## umid62,5:K60 = 0
## umid50:K120 = 0
## umid62,5:K120 = 0
## umid50:K180 = 0
## umid62,5:K180 = 0
## 
## Model 1: restricted model
## Model 2: nvag ~ bloc + umid * K
## 
## Note: Coefficient covariance matrix supplied.
## 
##   Res.Df Df  Chisq Pr(>Chisq)  
## 1     63                       
## 2     55  8 16.219    0.03936 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#-----------------------------------------------------------------------
# Predição com bandas de confiança.

X <- LSmatrix(m0, effect = c("umid", "K"))

pred <- attr(X, "grid")
pred <- transform(pred,
                  K = as.integer(K),
                  umid = factor(umid))
pred <- list(pois = pred, pgen = pred)

# Preditos pela Poisson.
# aux <- predict(m0, newdata = pred$pois, se.fit = TRUE)
# aux <- exp(aux$fit + outer(aux$se.fit, qn, FUN = "*"))
# pred$pois <- cbind(pred$pois, aux)
aux <- confint(glht(m0, linfct = X),
               calpha = univariate_calpha())$confint
colnames(aux)[1] <- "fit"
pred$pois <- cbind(pred$pois, exp(aux))
str(pred$pois)
## 'data.frame':    15 obs. of  5 variables:
##  $ umid: Factor w/ 3 levels "37,5","50","62,5": 1 2 3 1 2 3 1 2 3 1 ...
##  $ K   : int  0 0 0 30 30 30 60 60 60 120 ...
##  $ fit : num  48.7 55.7 60.5 64.1 78.1 ...
##  $ lwr : num  43 49.6 54.1 57.5 70.7 ...
##  $ upr : num  55.3 62.7 67.7 71.5 86.3 ...
# Predito para a Poisson Generalizada.
aux <- predict(m3, newdata = X,
               interval = "confidence", type = "response")
pred$pgen <- cbind(pred$pgen, aux[, c(2, 1, 3)])

pred <- ldply(pred, .id = "modelo")
pred <- arrange(pred, umid, K, modelo)

key <- list(type = "o", divide = 1,
            lines = list(pch = 1:nlevels(pred$modelo),
                         lty = 1, col = 1),
            text = list(c("Poisson", "Poisson Generelizada")))

xyplot(fit ~ K | umid, data = pred,
       layout = c(NA, 1), as.table = TRUE,
       xlim = extendrange(range(pred$K), f = 0.1),
       key = key, pch = pred$modelo,
       xlab = expression("Dose de potássio"~(mg~dm^{-3})),
       ylab = "Número de vagens por parcela",
       ly = pred$lwr, uy = pred$upr, cty = "bars", length = 0,
       prepanel = prepanel.cbH,
       desloc = 8 * scale(as.integer(pred$modelo), scale = FALSE),
       panel = panel.cbH)

Número de Grãos Produzidas em Soja

Análise do número de grãos por pacela do experimento com soja.

#-----------------------------------------------------------------------

xyplot(ngra ~ K | umid, data = soja, layout = c(NA, 1),
       type = c("p", "smooth"),
       ylab = "Número de grãos por parcela",
       xlab = expression("Dose de potássio aplicada"~(mg ~ dm^{3})),
       strip = strip.custom(strip.names = TRUE, var.name = "Umidade"))

#-----------------------------------------------------------------------
# Modelo Poisson.

m0 <- glm(ngra ~ bloc + umid * K, data = soja, family = poisson)
m1 <- update(m0, family = quasipoisson)

# Diagnóstico.
par(mfrow = c(2, 2))
plot(m0); layout(1)

# Medidas de decisão.
# anova(m0, test = "Chisq")
anova(m1, test = "Chisq")
## Analysis of Deviance Table
## 
## Model: quasipoisson, link: log
## 
## Response: ngra
## 
## Terms added sequentially (first to last)
## 
## 
##        Df Deviance Resid. Df Resid. Dev Pr(>Chi)    
## NULL                      73     761.43             
## bloc    4    28.34        69     733.09  0.01471 *  
## umid    2   185.06        67     548.03  < 2e-16 ***
## K       4   380.32        63     167.71  < 2e-16 ***
## umid:K  8    42.99        55     124.72  0.01606 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m1)
## 
## Call:
## glm(formula = ngra ~ bloc + umid * K, family = quasipoisson, 
##     data = soja)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.6297  -1.0707  -0.0873   0.7428   3.2810  
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    4.80196    0.06773  70.898  < 2e-16 ***
## blocII        -0.01939    0.04017  -0.483 0.631230    
## blocIII       -0.03663    0.04035  -0.908 0.367964    
## blocIV        -0.10559    0.04107  -2.571 0.012884 *  
## blocV         -0.09313    0.04218  -2.208 0.031433 *  
## umid50         0.13245    0.08611   1.538 0.129739    
## umid62,5       0.18548    0.08506   2.180 0.033517 *  
## K30            0.29799    0.08299   3.591 0.000703 ***
## K60            0.34434    0.08218   4.190 0.000102 ***
## K120           0.36493    0.08183   4.459  4.1e-05 ***
## K180           0.29542    0.08303   3.558 0.000778 ***
## umid50:K30     0.04344    0.11318   0.384 0.702600    
## umid62,5:K30  -0.13669    0.11386  -1.201 0.235081    
## umid50:K60     0.11559    0.11136   1.038 0.303817    
## umid62,5:K60   0.09174    0.11027   0.832 0.409039    
## umid50:K120    0.11860    0.11088   1.070 0.289461    
## umid62,5:K120  0.16266    0.11145   1.460 0.150096    
## umid50:K180    0.28832    0.11085   2.601 0.011917 *  
## umid62,5:K180  0.21569    0.11021   1.957 0.055430 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasipoisson family taken to be 2.28854)
## 
##     Null deviance: 761.43  on 73  degrees of freedom
## Residual deviance: 124.72  on 55  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 4
#-----------------------------------------------------------------------
# Modelo Poisson Generalizado.

L <- with(soja,
          list(y = ngra, offset = 1, X = model.matrix(m0)))

# Usa as estimativas do Poisson como valores iniciais.
start <- c(alpha = 0, coef(m0))
parnames(llpgnz) <- names(start)

# Com alpha fixo em 0 corresponde à Poisson.
m2 <- mle2(llpgnz, start = start, data = L,
           fixed = list(alpha = 0), vecpar = TRUE)

# Mesma medida de ajuste e estimativas.
c(logLik(m2), logLik(m0))
## [1] -321.6692 -321.6692
# Poisson Generalizada.
m3 <- mle2(llpgnz, start = start, data = L, vecpar = TRUE)

# Teste para nulinidade do parâmetro de dispersão (H_0: alpha == 0).
anova(m3, m2)
## Likelihood Ratio Tests
## Model 1: m3, [llpgnz]: alpha+(Intercept)+blocII+blocIII+blocIV+
##           blocV+umid50+umid62,5+K30+K60+K120+K180+
##           umid50:K30+umid62,5:K30+umid50:K60+umid62,5:K60+
##           umid50:K120+umid62,5:K120+umid50:K180+umid62,5:K180
## Model 2: m2, [llpgnz]: (Intercept)+blocII+blocIII+blocIV+blocV+
##           umid50+umid62,5+K30+K60+K120+K180+umid50:K30+
##           umid62,5:K30+umid50:K60+umid62,5:K60+umid50:K120+
##           umid62,5:K120+umid50:K180+umid62,5:K180
##   Tot Df Deviance  Chisq Df Pr(>Chisq)    
## 1     20   630.79                         
## 2     19   643.34 12.544  1  0.0003975 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Estimaitvas dos parâmetros.
cbind("PoissonGLM" = c(NA, coef(m0)),
      "PoissonML" = coef(m2),
      "PGeneraliz" = coef(m3))
##                PoissonGLM   PoissonML   PGeneraliz
##                        NA  0.00000000  0.001778618
## (Intercept)    4.80195973  4.80195972  4.802081183
## blocII        -0.01939070 -0.01939070 -0.022097221
## blocIII       -0.03662632 -0.03662632 -0.037831855
## blocIV        -0.10559332 -0.10559332 -0.099023604
## blocV         -0.09313375 -0.09313375 -0.098406090
## umid50         0.13245136  0.13245136  0.133402155
## umid62,5       0.18548293  0.18548293  0.187347770
## K30            0.29799144  0.29799144  0.299365942
## K60            0.34433662  0.34433662  0.344738743
## K120           0.36493092  0.36493092  0.366432817
## K180           0.29542405  0.29542405  0.294825457
## umid50:K30     0.04343930  0.04343930  0.039402786
## umid62,5:K30  -0.13669277 -0.13669277 -0.139668174
## umid50:K60     0.11559375  0.11559375  0.114460635
## umid62,5:K60   0.09174072  0.09174072  0.089168997
## umid50:K120    0.11859658  0.11859658  0.118496463
## umid62,5:K120  0.16266223  0.16266223  0.158280560
## umid50:K180    0.28832017  0.28832017  0.288513923
## umid62,5:K180  0.21568848  0.21568848  0.213872602
# Perfil para o parâmetro de dispersão.
plot(profile(m3, which = "alpha"))
abline(v = 0, lty = 2)

V <- cov2cor(vcov(m3))
corrplot.mixed(V, lower = "number", upper = "ellipse",
               diag = "l", tl.pos = "lt", tl.col = "black",
               tl.cex = 0.8, col = brewer.pal(9, "Greys")[-(1:3)])

dev.off()
## null device 
##           1
# Tamanho das covariâncias com \alpha.
each(sum, mean, max)(abs(V[1, -1]))
##        sum       mean        max 
## 0.19898168 0.01047272 0.04733996
# Teste de Wald para a interação.
a <- c(0, attr(model.matrix(m0), "assign"))
ai <- a == max(a)
L <- t(replicate(sum(ai), rbind(coef(m3) * 0), simplify = "matrix"))
L[, ai] <- diag(sum(ai))

# Cáclculo da estatística Chi-quadrado.
crossprod(L %*% coef(m3),
          solve(L %*% vcov(m3) %*% t(L),
                L %*% coef(m3)))
##          [,1]
## [1,] 25.04696
linearHypothesis(model = m0,
                 hypothesis.matrix = L,
                 vcov. = vcov(m3),
                 coef. = coef(m3))
## Linear hypothesis test
## 
## Hypothesis:
## umid50:K30 = 0
## umid62,5:K30 = 0
## umid50:K60 = 0
## umid62,5:K60 = 0
## umid50:K120 = 0
## umid62,5:K120 = 0
## umid50:K180 = 0
## umid62,5:K180 = 0
## 
## Model 1: restricted model
## Model 2: ngra ~ bloc + umid * K
## 
## Note: Coefficient covariance matrix supplied.
## 
##   Res.Df Df  Chisq Pr(>Chisq)   
## 1     63                        
## 2     55  8 25.047   0.001526 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#-----------------------------------------------------------------------
# Predição com bandas de confiança.

X <- LSmatrix(m0, effect = c("umid", "K"))

pred <- attr(X, "grid")
pred <- transform(pred,
                  K = as.integer(K),
                  umid = factor(umid))
pred <- list(pois = pred, quasi = pred, pgen = pred)

# Preditos pela Poisson.
aux <- confint(glht(m0, linfct = X),
               calpha = univariate_calpha())$confint
colnames(aux)[1] <- "fit"
pred$pois <- cbind(pred$pois, exp(aux))

# Preditos pela Quasi-Poisson.
aux <- confint(glht(m1, linfct = X),
               calpha = univariate_calpha())$confint
colnames(aux)[1] <- "fit"
pred$quasi <- cbind(pred$quasi, exp(aux))

# Preditos pela Poisson Generalizada.
aux <- predict(m3, newdata = X,
               interval = "confidence", type = "response")
pred$pgen <- cbind(pred$pgen, aux[, c(2, 1, 3)])

# Junta o resultado dos 3 modelos.
pred <- ldply(pred, .id = "modelo")
pred <- arrange(pred, umid, K, modelo)
str(pred)
## 'data.frame':    45 obs. of  6 variables:
##  $ modelo: Factor w/ 3 levels "pois","quasi",..: 1 2 3 1 2 3 1 2 3 1 ...
##  $ umid  : Factor w/ 3 levels "37,5","50","62,5": 1 1 1 1 1 1 1 1 1 1 ...
##  $ K     : int  0 0 0 30 30 30 60 60 60 120 ...
##  $ fit   : num  116 116 116 156 156 ...
##  $ lwr   : num  107 102 105 145 140 ...
##  $ upr   : num  126 131 128 167 173 ...
key <- list(type = "o", divide = 1,
            lines = list(pch = 1:nlevels(pred$modelo),
                         lty = 1, col = 1),
            text = list(c("Poisson", "Quasi-Poisson",
                          "Poisson Generelizada")))

xyplot(fit ~ K | umid, data = pred,
       layout = c(NA, 1), as.table = TRUE,
       xlim = extendrange(range(pred$K), f = 0.1),
       key = key, pch = pred$modelo,
       xlab = expression("Dose de potássio"~(mg~dm^{-3})),
       ylab = "Número de grãos por parcela",
       ly = pred$lwr, uy = pred$upr, cty = "bars", length = 0,
       prepanel = prepanel.cbH,
       desloc = 8 * scale(as.integer(pred$modelo), scale = FALSE),
       panel = panel.cbH)

Número de Grãos por Vagem

#-----------------------------------------------------------------------
# Número de grãos por vagem (offset).

xyplot(ngra/nvag ~ K | umid, data = soja, layout = c(NA, 1),
       type = c("p", "smooth"),
       xlab = expression("Dose de potássio"~(mg~dm^{-3})),
       ylab = "Número de grãos por vagem",
       strip = strip.custom(strip.names = TRUE, var.name = "Umidade"))

#-----------------------------------------------------------------------
# Modelo Poisson.

m0 <- glm(ngra ~ offset(log(nvag)) + bloc + umid * K,
          data = soja, family = poisson)
m1 <- update(m0, family = quasipoisson)

# Diagnóstico.
par(mfrow = c(2, 2))
plot(m0); layout(1)

# Medidas de decisão.
# anova(m0, test = "Chisq")
anova(m1, test = "F")
## Analysis of Deviance Table
## 
## Model: quasipoisson, link: log
## 
## Response: ngra
## 
## Terms added sequentially (first to last)
## 
## 
##        Df Deviance Resid. Df Resid. Dev      F  Pr(>F)  
## NULL                      73     34.238                 
## bloc    4   2.0264        69     32.212 1.1637 0.33686  
## umid    2   1.7457        67     30.466 2.0050 0.14439  
## K       4   3.8324        63     26.634 2.2009 0.08082 .
## umid:K  8   2.6490        55     23.984 0.7606 0.63838  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m1)
## 
## Call:
## glm(formula = ngra ~ offset(log(nvag)) + bloc + umid * K, family = quasipoisson, 
##     data = soja)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.52741  -0.46025   0.06782   0.45274   1.05490  
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    0.848160   0.029477  28.774   <2e-16 ***
## blocII         0.011313   0.017535   0.645   0.5215    
## blocIII        0.036250   0.017615   2.058   0.0444 *  
## blocIV         0.019384   0.017940   1.080   0.2846    
## blocV          0.016100   0.018448   0.873   0.3866    
## umid50        -0.001908   0.037574  -0.051   0.9597    
## umid62,5      -0.031173   0.037124  -0.840   0.4047    
## K30            0.022867   0.036200   0.632   0.5302    
## K60            0.034499   0.035862   0.962   0.3403    
## K120           0.035322   0.035703   0.989   0.3268    
## K180           0.040586   0.036221   1.121   0.2674    
## umid50:K30    -0.018692   0.049385  -0.378   0.7065    
## umid62,5:K30  -0.028459   0.049680  -0.573   0.5691    
## umid50:K60    -0.048182   0.048583  -0.992   0.3257    
## umid62,5:K60  -0.014035   0.048103  -0.292   0.7716    
## umid50:K120   -0.030438   0.048367  -0.629   0.5317    
## umid62,5:K120  0.045524   0.048714   0.935   0.3541    
## umid50:K180   -0.016302   0.048354  -0.337   0.7373    
## umid62,5:K180  0.016911   0.048076   0.352   0.7264    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasipoisson family taken to be 0.4353299)
## 
##     Null deviance: 34.238  on 73  degrees of freedom
## Residual deviance: 23.984  on 55  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 3
# Declara um modelo aditivo.
m0 <- glm(ngra ~ offset(log(nvag)) + bloc + umid + K,
          data = soja, family = poisson)
m1 <- update(m0, family = quasipoisson)
anova(m1, test = "F")
## Analysis of Deviance Table
## 
## Model: quasipoisson, link: log
## 
## Response: ngra
## 
## Terms added sequentially (first to last)
## 
## 
##      Df Deviance Resid. Df Resid. Dev      F  Pr(>F)  
## NULL                    73     34.238                 
## bloc  4   2.0264        69     32.212 1.2021 0.31880  
## umid  2   1.7457        67     30.466 2.0711 0.13455  
## K     4   3.8324        63     26.634 2.2734 0.07114 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#-----------------------------------------------------------------------
# Modelo Poisson Generalizado.

L <- with(soja,
          list(y = ngra, offset = nvag, X = model.matrix(m0)))

# De acordo com a estimativa de phi da Quasi-Poisson, esse dado é
# subdisperso. Já que na verossimilhaça (1 + alpha * y) > -1 quando
# alpha < 0, então o menor valor possível para gamma para ter uma
# log-verossimilhança avaliável é
-1/max(soja$ngra)
## [1] -0.003690037
# Mesmo com esse lower bound, o valor chute para alpha foi definido por
# tentativa erro. O valor -0.003 dá Error e o valor -0.002 na hora de
# perfilhar encontra um mínimo melhor. Então, por tentativa erro
# chegou-se no -0.0026.
start <- c(alpha = -0.0026, coef(m0))
parnames(llpgnz) <- names(start)

# Com alpha fixo em 0 corresponde à Poisson.
m2 <- mle2(llpgnz, start = start, data = L,
           fixed = list(alpha = 0), vecpar = TRUE)

# Mesma medida de ajuste e estimativas.
c(logLik(m2), logLik(m0))
## [1] -272.6263 -272.6263
# Poisson Generalizada.
m3 <- mle2(llpgnz, start = start, data = L, vecpar = TRUE)

# Teste para nulinidade do parâmetro de dispersão (H_0: alpha == 0).
anova(m3, m2)
## Likelihood Ratio Tests
## Model 1: m3, [llpgnz]: alpha+(Intercept)+blocII+blocIII+blocIV+
##           blocV+umid50+umid62,5+K30+K60+K120+K180
## Model 2: m2, [llpgnz]: (Intercept)+blocII+blocIII+blocIV+blocV+
##           umid50+umid62,5+K30+K60+K120+K180
##   Tot Df Deviance  Chisq Df Pr(>Chisq)    
## 1     12   516.81                         
## 2     11   545.25 28.448  1  9.627e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cbind("PoissonGLM" = c(NA, coef(m0)),
      "PoissonML" = coef(m2),
      "PGeneraliz" = coef(m3))
##               PoissonGLM    PoissonML   PGeneraliz
##                       NA  0.000000000 -0.002108602
## (Intercept)  0.855552235  0.855552234  0.851442205
## blocII       0.011126118  0.011126118  0.008402336
## blocIII      0.036245340  0.036245340  0.035298311
## blocIV       0.018895822  0.018895822  0.017120556
## blocV        0.012659290  0.012659290  0.007123349
## umid50      -0.026631767 -0.026631767 -0.026018732
## umid62,5    -0.026250165 -0.026250165 -0.020361875
## K30          0.007448488  0.007448488  0.008477443
## K60          0.012551823  0.012551823  0.012559113
## K120         0.039757260  0.039757260  0.045813881
## K180         0.041632187  0.041632187  0.046369545
# Perfil para o parâmetro de dispersão.
plot(profile(m3, which = "alpha"))

V <- cov2cor(vcov(m3))
corrplot.mixed(V, lower = "number", upper = "ellipse",
               diag = "l", tl.pos = "lt", tl.col = "black",
               tl.cex = 0.8, col = brewer.pal(9, "Greys")[-(1:3)])

dev.off()
## null device 
##           1
# Tamanho das covariâncias com \alpha.
each(sum, mean, max)(abs(V[1, -1]))
##        sum       mean        max 
## 0.49953096 0.04541191 0.11695102
# Teste de Wald para a interação.
a <- c(0, attr(model.matrix(m0), "assign"))
ai <- a == max(a)
L <- t(replicate(sum(ai), rbind(coef(m3) * 0), simplify = "matrix"))
L[, ai] <- diag(sum(ai))

# Cáclculo da estatística Chi-quadrado.
crossprod(L %*% coef(m3),
          solve(L %*% vcov(m3) %*% t(L),
                L %*% coef(m3)))
##          [,1]
## [1,] 14.26048
linearHypothesis(model = m0,
                 hypothesis.matrix = L,
                 vcov. = vcov(m3),
                 coef. = coef(m3))
## Linear hypothesis test
## 
## Hypothesis:
## K30 = 0
## K60 = 0
## K120 = 0
## K180 = 0
## 
## Model 1: restricted model
## Model 2: ngra ~ offset(log(nvag)) + bloc + umid + K
## 
## Note: Coefficient covariance matrix supplied.
## 
##   Res.Df Df  Chisq Pr(>Chisq)   
## 1     67                        
## 2     63  4 14.261   0.006508 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#-----------------------------------------------------------------------
# Predição com bandas de confiança.

# Por causa do offset, não tem como usar a LSmatrix.
pred <- unique(subset(soja, select = c("umid", "K")))

X <- model.matrix(formula(m0)[-2],
                  data = cbind(nvag = 1, bloc = soja$bloc[1], pred))

i <- grep(x = colnames(X), pattern = "^bloc")
X[, i] <- X[, i] * 0 + 1/(length(i) + 1)
head(X)
##   (Intercept) blocII blocIII blocIV blocV umid50 umid62,5 K30 K60
## 1           1    0.2     0.2    0.2   0.2      0        0   0   0
## 2           1    0.2     0.2    0.2   0.2      0        0   1   0
## 3           1    0.2     0.2    0.2   0.2      0        0   0   1
## 4           1    0.2     0.2    0.2   0.2      0        0   0   0
## 5           1    0.2     0.2    0.2   0.2      0        0   0   0
## 6           1    0.2     0.2    0.2   0.2      1        0   0   0
##   K120 K180
## 1    0    0
## 2    0    0
## 3    0    0
## 4    1    0
## 5    0    1
## 6    0    0
pred <- list(pois = pred, quasi = pred, pgen = pred)

# Preditos pela Poisson.
aux <- confint(glht(m0, linfct = X),
               calpha = univariate_calpha())$confint
colnames(aux)[1] <- "fit"
pred$pois <- cbind(pred$pois, exp(aux))

# Preditos pela Quasi-Poisson.
aux <- confint(glht(m1, linfct = X),
               calpha = univariate_calpha())$confint
colnames(aux)[1] <- "fit"
pred$quasi <- cbind(pred$quasi, exp(aux))

# Preditos pela Poisson Generalizada.
aux <- predict(m3, newdata = X,
               interval = "confidence", type = "response")
pred$pgen <- cbind(pred$pgen, aux[, c(2, 1, 3)])

# Junta o resultado dos 3 modelos.
pred <- ldply(pred, .id = "modelo")
pred <- arrange(pred, umid, K, modelo)
pred$K <- as.numeric(as.character(pred$K))

key <- list(type = "o", divide = 1,
            lines = list(pch = 1:nlevels(pred$modelo),
                         lty = 1, col = 1),
            text = list(c("Poisson", "Quasi-Poisson",
                          "Poisson Generelizada")))

xyplot(fit ~ K | umid, data = pred,
       layout = c(NA, 1), as.table = TRUE,
       xlim = extendrange(range(pred$K), f = 0.2),
       key = key, pch = pred$modelo,
       xlab = expression("Dose de potássio"~(mg~dm^{-3})),
       ylab = "Número de grãos por parcela",
       ly = pred$lwr, uy = pred$upr, cty = "bars", length = 0,
       prepanel = prepanel.cbH,
       desloc = 8 * scale(as.integer(pred$modelo), scale = FALSE),
       panel = panel.cbH)

Número de Capulhos Produzidos em Algodão

Experimento conduzido em casa de vegetação para avaliar o efeito da desfolha, em diferentes fases de desenvolvimento do algodão, sobre a produção da cultura. As parcelas foram vasos com duas plantas que tiveram a área das folhas removidas com uma tesoura, simulando o ataque de insetos desfolhadores, nos níveis de 0, 25, 50, 75 e 100% de remoção de área foliar. Em cada fase de desenvolvimento (de 5), 5 parcelas sofreram desfolha nos níveis mencionados. O número de capulhos ao final do experimento foi contado.

#-----------------------------------------------------------------------
# Número de capulhos em função do nível de desfolha artificial e fase
# fenológica do algodoeiro.

data(capdesfo, package = "MRDCr")
str(capdesfo)
## 'data.frame':    125 obs. of  4 variables:
##  $ est : Factor w/ 5 levels "vegetativo","botão floral",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ des : num  0 0 0 0 0 0.25 0.25 0.25 0.25 0.25 ...
##  $ rept: int  1 2 3 4 5 1 2 3 4 5 ...
##  $ ncap: int  10 9 8 8 10 11 9 10 10 10 ...
p1 <- xyplot(ncap ~ des | est, data = capdesfo,
             col = 1, type = c("p", "smooth"), col.line = "gray50",
             layout = c(2, 3), as.table = TRUE,
             xlim = extendrange(c(0:1), f = 0.15),
             xlab = "Nível de desfolhas artificial",
             ylab = "Número de capulho produzidos",
             spread = 0.035, panel = panel.beeswarm)
p1

#-----------------------------------------------------------------------
# Modelo Poisson.

m0 <- glm(ncap ~ est * (des + I(des^2)),
          data = capdesfo, family = poisson)
m1 <- update(m0, family = quasipoisson)

par(mfrow = c(2, 2))
plot(m0); layout(1)

anova(m0, test = "Chisq")
## Analysis of Deviance Table
## 
## Model: poisson, link: log
## 
## Response: ncap
## 
## Terms added sequentially (first to last)
## 
## 
##              Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                           124     75.514              
## est           4  19.9582       120     55.556  0.000509 ***
## des           1  15.8643       119     39.692 6.805e-05 ***
## I(des^2)      1   1.2926       118     38.399  0.255566    
## est:des       4   6.7085       114     31.691  0.152119    
## est:I(des^2)  4   6.3592       110     25.331  0.173883    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m1, test = "F")
## Analysis of Deviance Table
## 
## Model: quasipoisson, link: log
## 
## Response: ncap
## 
## Terms added sequentially (first to last)
## 
## 
##              Df Deviance Resid. Df Resid. Dev       F    Pr(>F)    
## NULL                           124     75.514                      
## est           4  19.9582       120     55.556 21.5543 3.775e-13 ***
## des           1  15.8643       119     39.692 68.5317 3.269e-13 ***
## I(des^2)      1   1.2926       118     38.399  5.5839   0.01988 *  
## est:des       4   6.7085       114     31.691  7.2450 3.236e-05 ***
## est:I(des^2)  4   6.3592       110     25.331  6.8677 5.674e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m1)
## 
## Call:
## glm(formula = ncap ~ est * (des + I(des^2)), family = quasipoisson, 
##     data = capdesfo)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.07706  -0.30981  -0.02283   0.27044   1.16650  
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               2.21424    0.06706  33.018  < 2e-16 ***
## estbotão floral          -0.08003    0.09655  -0.829  0.40898    
## estflorecimento          -0.02723    0.09626  -0.283  0.77781    
## estmaça                  -0.14861    0.09867  -1.506  0.13489    
## estcapulho                0.11291    0.09247   1.221  0.22465    
## des                       0.34859    0.32741   1.065  0.28935    
## I(des^2)                 -0.73843    0.32397  -2.279  0.02458 *  
## estbotão floral:des       0.13638    0.46401   0.294  0.76937    
## estflorecimento:des      -1.58189    0.49140  -3.219  0.00169 ** 
## estmaça:des               0.47548    0.49046   0.969  0.33444    
## estcapulho:des           -0.82100    0.45204  -1.816  0.07206 .  
## estbotão floral:I(des^2)  0.10435    0.45451   0.230  0.81884    
## estflorecimento:I(des^2)  1.40435    0.49033   2.864  0.00501 ** 
## estmaça:I(des^2)         -0.92938    0.49667  -1.871  0.06397 .  
## estcapulho:I(des^2)       1.07565    0.44314   2.427  0.01683 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasipoisson family taken to be 0.2314882)
## 
##     Null deviance: 75.514  on 124  degrees of freedom
## Residual deviance: 25.331  on 110  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 4
#-----------------------------------------------------------------------
# Modelo Poisson Generalizada.

L <- with(capdesfo,
          list(y = ncap, offset = 1, X = model.matrix(m0)))

start <- c(alpha = log(1), coef(m0))
parnames(llpgnz) <- names(start)

# Modelo Poisson também.
m2 <- mle2(llpgnz, start = start, data = L,
           fixed = list(alpha = 0), vecpar = TRUE)

c(logLik(m2), logLik(m0))
## [1] -254.8414 -254.8414
# Modelo Poisson Generalizado.
m3 <- mle2(llpgnz, start = start, data = L, vecpar = TRUE)
logLik(m3)
## 'log Lik.' -210.6668 (df=16)
anova(m3, m2)
## Likelihood Ratio Tests
## Model 1: m3, [llpgnz]: alpha+(Intercept)+estbotão floral+
##           estflorecimento+estmaça+estcapulho+des+I(des^2)+
##           estbotão floral:des+estflorecimento:des+estmaça:des+
##           estcapulho:des+estbotão floral:I(des^2)+
##           estflorecimento:I(des^2)+estmaça:I(des^2)+
##           estcapulho:I(des^2)
## Model 2: m2, [llpgnz]: (Intercept)+estbotão floral+
##           estflorecimento+estmaça+estcapulho+des+I(des^2)+
##           estbotão floral:des+estflorecimento:des+estmaça:des+
##           estcapulho:des+estbotão floral:I(des^2)+
##           estflorecimento:I(des^2)+estmaça:I(des^2)+
##           estcapulho:I(des^2)
##   Tot Df Deviance  Chisq Df Pr(>Chisq)    
## 1     16   421.33                         
## 2     15   509.68 88.349  1  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m3)
## Maximum likelihood estimation
## 
## Call:
## mle2(minuslogl = llpgnz, start = start, data = L, vecpar = TRUE)
## 
## Coefficients:
##                            Estimate Std. Error  z value     Pr(z)
## alpha                    -0.0611259  0.0031211 -19.5847 < 2.2e-16
## (Intercept)               2.2159873  0.0603520  36.7177 < 2.2e-16
## estbotão floral          -0.0792916  0.0914575  -0.8670 0.3859545
## estflorecimento          -0.0022499  0.0894092  -0.0252 0.9799245
## estmaça                  -0.1453929  0.1031466  -1.4096 0.1586649
## estcapulho                0.1054861  0.0781060   1.3506 0.1768394
## des                       0.3533745  0.3101810   1.1393 0.2545978
## I(des^2)                 -0.7506691  0.3305526  -2.2710 0.0231499
## estbotão floral:des       0.1116847  0.4465137   0.2501 0.8024899
## estflorecimento:des      -1.7775352  0.5328740  -3.3358 0.0008507
## estmaça:des               0.2618588  0.5439359   0.4814 0.6302217
## estcapulho:des           -0.7890219  0.4081733  -1.9331 0.0532292
## estbotão floral:I(des^2)  0.1369131  0.4587833   0.2984 0.7653777
## estflorecimento:I(des^2)  1.5933721  0.5624116   2.8331 0.0046098
## estmaça:I(des^2)         -0.6273100  0.5754683  -1.0901 0.2756752
## estcapulho:I(des^2)       1.0546017  0.4257783   2.4769 0.0132536
##                             
## alpha                    ***
## (Intercept)              ***
## estbotão floral             
## estflorecimento             
## estmaça                     
## estcapulho                  
## des                         
## I(des^2)                 *  
## estbotão floral:des         
## estflorecimento:des      ***
## estmaça:des                 
## estcapulho:des           .  
## estbotão floral:I(des^2)    
## estflorecimento:I(des^2) ** 
## estmaça:I(des^2)            
## estcapulho:I(des^2)      *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## -2 log L: 421.3336
plot(profile(m3, which = "alpha"))

cbind("PoissonGLM" = c(NA, coef(m0)),
      "PoissonML" = coef(m2),
      "PGeneraliz" = coef(m3))
##                           PoissonGLM   PoissonML   PGeneraliz
##                                   NA  0.00000000 -0.061125928
## (Intercept)               2.21424242  2.21424242  2.215987267
## estbotão floral          -0.08002756 -0.08002756 -0.079291571
## estflorecimento          -0.02722855 -0.02722855 -0.002249855
## estmaça                  -0.14861263 -0.14861263 -0.145392921
## estcapulho                0.11291268  0.11291268  0.105486116
## des                       0.34858564  0.34858564  0.353374487
## I(des^2)                 -0.73843427 -0.73843427 -0.750669137
## estbotão floral:des       0.13638395  0.13638395  0.111684700
## estflorecimento:des      -1.58188650 -1.58188650 -1.777535174
## estmaça:des               0.47548300  0.47548300  0.261858845
## estcapulho:des           -0.82100307 -0.82100307 -0.789021942
## estbotão floral:I(des^2)  0.10435096  0.10435096  0.136913065
## estflorecimento:I(des^2)  1.40435178  1.40435178  1.593372095
## estmaça:I(des^2)         -0.92937720 -0.92937720 -0.627310018
## estcapulho:I(des^2)       1.07565430  1.07565430  1.054601664
V <- cov2cor(vcov(m3))
corrplot.mixed(V, lower = "number", upper = "ellipse",
               diag = "l", tl.pos = "lt", tl.col = "black",
               tl.cex = 0.8, col = brewer.pal(9, "Greys")[-(1:3)])

dev.off()
## null device 
##           1
# Tamanho das covariâncias com \alpha.
each(sum, mean, max)(abs(V[1, -1]))
##        sum       mean        max 
## 0.22237413 0.01482494 0.04892709
# Teste de Wald para a interação.
a <- c(0, attr(model.matrix(m0), "assign"))
ai <- a == max(a)
L <- t(replicate(sum(ai), rbind(coef(m3) * 0), simplify = "matrix"))
L[, ai] <- diag(sum(ai))

# Teste de Wald explicito.
crossprod(L %*% coef(m3),
          solve(L %*% vcov(m3) %*% t(L),
                L %*% coef(m3)))
##          [,1]
## [1,] 19.38121
# Teste de Wald para interação (poderia ser LRT, claro).
# É necessário um objeto glm, mas necesse caso ele não usado para nada.
linearHypothesis(model = m0,
                 hypothesis.matrix = L,
                 vcov. = vcov(m3),
                 coef. = coef(m3))
## Linear hypothesis test
## 
## Hypothesis:
## estbotão floral:I(des^2) = 0
## estflorecimento:I(des^2) = 0
## estmaça:I(des^2) = 0
## estcapulho:I(des^2) = 0
## 
## Model 1: restricted model
## Model 2: ncap ~ est * (des + I(des^2))
## 
## Note: Coefficient covariance matrix supplied.
## 
##   Res.Df Df  Chisq Pr(>Chisq)    
## 1    114                         
## 2    110  4 19.381  0.0006613 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#-----------------------------------------------------------------------
# Predição com bandas de confiança.

pred <- with(capdesfo, expand.grid(est = levels(est),
                                   des = seq(0, 1, by = 0.025)))
X <- model.matrix(formula(m0)[-2], data = pred)
pred <- list(pois = pred, quasi = pred, pgen = pred)

# Preditos pela Poisson.
aux <- confint(glht(m0, linfct = X),
               calpha = univariate_calpha())$confint
colnames(aux)[1] <- "fit"
pred$pois <- cbind(pred$pois, exp(aux))

# Preditos pela Quasi-Poisson.
aux <- confint(glht(m1, linfct = X),
               calpha = univariate_calpha())$confint
colnames(aux)[1] <- "fit"
pred$quasi <- cbind(pred$quasi, exp(aux))

# Preditos pela Poisson Generalizada.
aux <- predict(m3, newdata = X,
               interval = "confidence", type = "response")
pred$pgen <- cbind(pred$pgen, aux[, c(2, 1, 3)])

pred <- ldply(pred, .id = "modelo")
pred <- arrange(pred, est, des, modelo)

key <- list(lines = list(lty = 1),
            text = list(c("Poisson", "Quasi-Poisson",
                          "Poisson Generelizada")))
key$lines$col <-
    trellis.par.get("superpose.line")$col[1:nlevels(pred$modelo)]

p2 <- xyplot(fit ~ des | est, data = pred, groups = modelo,
             layout = c(NA, 1), as.table = TRUE,
             xlim = extendrange(range(pred$des), f = 0.1),
             type = "l", key = key,
             ly = pred$lwr, uy = pred$upr,
             cty = "bands", alpha = 0.35,
             prepanel = prepanel.cbH,
             panel.groups = panel.cbH,
             panel = panel.superpose)
# p2
update(p1, type = "p", layout = c(NA, 1),
       key = key, spread = 0.07) +
    as.layer(p2, under = TRUE)

Número de Nematóides em Linhagens de Feijão

#-----------------------------------------------------------------------

data(nematoide, package = "MRDCr")
str(nematoide)
## 'data.frame':    94 obs. of  5 variables:
##  $ cult: Factor w/ 19 levels "A","B","C","D",..: 1 1 1 1 1 2 2 2 2 2 ...
##  $ mfr : num  10.52 11.03 6.42 8.16 4.48 ...
##  $ vol : int  40 40 40 40 40 40 40 40 40 40 ...
##  $ nema: int  4 5 3 4 3 2 2 2 2 2 ...
##  $ off : num  0.263 0.276 0.161 0.204 0.112 ...
# Número de nematóides por grama de raíz.
plot(nema ~ off, data = nematoide)

# Média do número de nematóides por grama de raíz.
mv <- aggregate(cbind(y = nema/off) ~ cult, data = nematoide,
                FUN = function(x) c(m = mean(x), v = var(x)))

xyplot(y[, "v"] ~ y[, "m"], data = mv,
       xlab = "Média amostral",
       ylab = "Variância amostral") +
    layer(panel.abline(a = 0, b = 1, lty = 2))

#-----------------------------------------------------------------------
# Ajuste do Poisson.

m0 <- glm(nema ~ offset(log(off)) + cult,
          data = nematoide,
          family = poisson)
m1 <- update(m0, family = quasipoisson)

# Diagnóstico.
par(mfrow = c(2, 2))
plot(m0); layout(1)

# Estimativas dos parâmetros.
summary(m1)
## 
## Call:
## glm(formula = nema ~ offset(log(off)) + cult, family = quasipoisson, 
##     data = nematoide)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.9455  -0.6693   0.1100   1.1519   4.7603  
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2.9293     0.4496   6.515 7.42e-09 ***
## cultB         0.1662     0.7657   0.217 0.828703    
## cultC         0.3417     0.6769   0.505 0.615154    
## cultD         0.9128     0.6359   1.436 0.155278    
## cultE         0.9692     0.6076   1.595 0.114879    
## cultF         0.3973     0.6903   0.576 0.566621    
## cultG        -0.4507     0.7931  -0.568 0.571522    
## cultH         0.3218     0.8665   0.371 0.711433    
## cultI         0.8808     0.6018   1.464 0.147507    
## cultJ         1.1982     0.6359   1.884 0.063390 .  
## cultK         1.4511     0.5965   2.433 0.017369 *  
## cultL         1.4299     0.5362   2.667 0.009381 ** 
## cultM         1.6138     0.5507   2.931 0.004482 ** 
## cultN         1.7743     0.4958   3.579 0.000610 ***
## cultO         1.5776     0.5268   2.995 0.003718 ** 
## cultP         1.6719     0.5103   3.277 0.001593 ** 
## cultQ         2.2105     0.4913   4.499 2.45e-05 ***
## cultR         2.2080     0.5103   4.327 4.60e-05 ***
## cultS         1.9155     0.4958   3.863 0.000236 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasipoisson family taken to be 3.841169)
## 
##     Null deviance: 556.73  on 93  degrees of freedom
## Residual deviance: 212.54  on 75  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 6
# Quadro de deviance.
# anova(m0, test = "Chisq")
anova(m1, test = "F")
## Analysis of Deviance Table
## 
## Model: quasipoisson, link: log
## 
## Response: nema
## 
## Terms added sequentially (first to last)
## 
## 
##      Df Deviance Resid. Df Resid. Dev     F    Pr(>F)    
## NULL                    93     556.73                    
## cult 18   344.19        75     212.54 4.978 3.475e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#-----------------------------------------------------------------------
# Ajuste da Poisson Generalizada.

L <- with(nematoide,
          list(y = nema, offset = off, X = model.matrix(m0)))

start <- c(alpha = log(1), coef(m0))
parnames(llpgnz) <- names(start)

# Modelo Poisson também.
m2 <- mle2(llpgnz, start = start, data = L,
           fixed = list(alpha = 0), vecpar = TRUE)

c(logLik(m2), logLik(m0))
## [1] -274.5007 -274.5007
# Poisson Generalizada.
m3 <- pgnz(formula(m0), data = nematoide)

# Diferença de deviance.
# 2 * diff(c(logLik(m0), logLik(m3)))
anova(m3, m2)
## Likelihood Ratio Tests
## Model 1: m3, [llpgnz]: alpha+(Intercept)+cultB+cultC+cultD+
##           cultE+cultF+cultG+cultH+cultI+cultJ+cultK+cultL+
##           cultM+cultN+cultO+cultP+cultQ+cultR+cultS
## Model 2: m2, [llpgnz]: (Intercept)+cultB+cultC+cultD+cultE+
##           cultF+cultG+cultH+cultI+cultJ+cultK+cultL+cultM+
##           cultN+cultO+cultP+cultQ+cultR+cultS
##   Tot Df Deviance  Chisq Df Pr(>Chisq)    
## 1     20   497.89                         
## 2     19   549.00 51.112  1  8.725e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Perfil de log-verossimilhança para o parâmetro de dispersão.
plot(profile(m3, which = "alpha"))

# Covariância.
V <- cov2cor(vcov(m3))
corrplot.mixed(V, lower = "number", upper = "ellipse",
               diag = "l", tl.pos = "lt", tl.col = "black",
               tl.cex = 0.8, col = brewer.pal(9, "Greys")[-(1:3)])

dev.off()
## null device 
##           1
# Tamanho das covariâncias com \alpha.
each(sum, mean, max)(abs(V[1, -1]))
##        sum       mean        max 
## 1.06647709 0.05613037 0.16898969
# Gráfico das estimativas.
pars <- data.frame(Pois = c(0, coef(m0)), PGen = coef(m3))
xyplot(PGen ~ Pois, data = pars, aspect = "iso", grid = TRUE) +
    layer(panel.abline(a = 0, b = 1, lty = 2))

#-----------------------------------------------------------------------

X <- model.matrix(m0)

# # Predito do número de nematóides observado (considera o offset).
# with(nematoide, {
#     cbind(y = nema,
#           Pois = nematoide$off * exp(X %*% coef(m0)),
#           PGen = nematoide$off * exp(X %*% coef(m1)[-1]))
# })

# Predito do número de nematóides por grama de raíz.
pred <- with(nematoide, {
    data.frame(y = nema/off,
               Pois = c(exp(X %*% coef(m0))),
               PGen = c(exp(X %*% coef(m3)[-1])))
})
str(pred)
## 'data.frame':    94 obs. of  3 variables:
##  $ y   : num  15.2 18.1 18.7 19.6 26.8 ...
##  $ Pois: num  18.7 18.7 18.7 18.7 18.7 ...
##  $ PGen: num  19 19 19 19 19 ...
splom(pred) + layer(panel.abline(a = 0, b = 1))

# Correlação predito x observado.
cor(pred)
##              y      Pois      PGen
## y    1.0000000 0.6157455 0.6455661
## Pois 0.6157455 1.0000000 0.9497288
## PGen 0.6455661 0.9497288 1.0000000
# Média das observações de das estimativas por cultivar.
predm <- aggregate(as.matrix(pred) ~ cult, data = nematoide, FUN = mean)
cor(predm[, -1])
##              y      Pois      PGen
## y    1.0000000 0.9374510 0.9824583
## Pois 0.9374510 1.0000000 0.9500745
## PGen 0.9824583 0.9500745 1.0000000
#-----------------------------------------------------------------------
# Predição com intervalos de confiança.

pred <- unique(subset(nematoide, select = cult))
X <- model.matrix(~cult, data = pred)

pred <- list(pois = pred, quasi = pred, pgen = pred)

# Preditos pela Poisson.
aux <- confint(glht(m0, linfct = X),
               calpha = univariate_calpha())$confint
colnames(aux)[1] <- "fit"
pred$pois <- cbind(pred$pois, exp(aux))

# Preditos pela Quasi-Poisson.
aux <- confint(glht(m1, linfct = X),
               calpha = univariate_calpha())$confint
colnames(aux)[1] <- "fit"
pred$quasi <- cbind(pred$quasi, exp(aux))

# Preditos pela Poisson Generalizada.
aux <- predict(m3, newdata = X,
               interval = "confidence", type = "response")
pred$pgen <- cbind(pred$pgen, aux[, c(2, 1, 3)])

pred <- ldply(pred, .id = "modelo")
pred <- arrange(pred, cult, modelo)

key <- list(type = "o", divide = 1,
            lines = list(pch = 1:nlevels(pred$modelo),
                         lty = 1, col = 1),
            text = list(c("Poisson", "Quasi-Poisson",
                          "Poisson Generelizada")))

xyplot(nema/off ~ cult, data = nematoide,
       key = key,
       xlab = "Cultivar de feijão",
       ylab = "Número de nematóides por grama de raíz") +
    as.layer(
        xyplot(fit ~ cult, data = pred,
               pch = pred$modelo,
               ly = pred$lwr, uy = pred$upr,
               cty = "bars", length = 0,
               prepanel = prepanel.cbH,
               desloc = 0.25 * scale(as.integer(pred$modelo),
                                    scale = FALSE),
               panel = panel.cbH))

#-----------------------------------------------------------------------
# Resíduos de Pearson.

X <- model.matrix(m0)

# # Resíduos de Pearson no Poisson.
# with(nematoide,  {
#     y <- nema
#     # haty <- fitted(m0)
#     haty <- nematoide$off * exp(X %*% coef(m0))
#     sdy <- sqrt(haty)
#     cbind((y - haty)/sdy,
#           residuals(m0, type = "pearson"))
# })

# Resíduos de Pearson do Poisson Generalizado.
rp <- with(nematoide,  {
    y <- nema
    alph <- coef(m3)["alpha"]
    haty <- c(nematoide$off * exp(X %*% coef(m3)[-1]))
    sdy <- sqrt(haty) * (1 + alph * haty)
    (y - haty)/sdy
})

rp <- stack(data.frame(Pois = residuals(m0, type = "pearson"),
                       PGen = rp))

qqmath(~values | ind, data = rp,
       xlab = "Quantis teóricos",
       ylab = "Resíduos de Pearson",
       panel = function(...) {
           panel.qqmathline(...)
           panel.qqmath(...)
       })

Informações da sessão

## Atualizado em 21 de julho de 2016.
## 
## R version 3.3.0 (2016-05-03)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 16.04 LTS
## 
## locale:
##  [1] LC_CTYPE=pt_BR.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=pt_BR.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=pt_BR.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=pt_BR.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=pt_BR.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets 
## [7] methods   base     
## 
## other attached packages:
##  [1] plyr_1.8.4          corrplot_0.77       effects_3.1-1      
##  [4] hnp_1.2             sandwich_2.3-4      car_2.1-2          
##  [7] boot_1.3-18         lmtest_0.9-34       zoo_1.7-13         
## [10] MRDCr_0.0-2         multcomp_1.4-5      TH.data_1.0-7      
## [13] MASS_7.3-45         survival_2.39-4     mvtnorm_1.0-5      
## [16] doBy_4.5-15         latticeExtra_0.6-28 RColorBrewer_1.1-2 
## [19] lattice_0.20-33     knitr_1.13          bbmle_1.0.18       
## [22] devtools_1.11.1     gsubfn_0.6-6        proto_0.3-10       
## 
## loaded via a namespace (and not attached):
##  [1] splines_3.3.0      tcltk_3.3.0        colorspace_1.2-6  
##  [4] testthat_1.0.2     htmltools_0.3.5    yaml_2.1.13       
##  [7] mgcv_1.8-12        nloptr_1.0.4       withr_1.0.1       
## [10] stringr_1.0.0      MatrixModels_0.4-1 codetools_0.2-14  
## [13] memoise_1.0.0      evaluate_0.9       SparseM_1.7       
## [16] quantreg_5.26      pbkrtest_0.4-6     parallel_3.3.0    
## [19] highr_0.6          Rcpp_0.12.3        formatR_1.4       
## [22] lme4_1.1-12        digest_0.6.9       stringi_1.1.1     
## [25] numDeriv_2014.2-1  grid_3.3.0         tools_3.3.0       
## [28] magrittr_1.5       crayon_1.3.1       Matrix_1.2-6      
## [31] minqa_1.2.4        rmarkdown_0.9.6    roxygen2_5.0.1    
## [34] R6_2.1.2           nnet_7.3-12        nlme_3.1-128      
## [37] compiler_3.3.0