Gamma-Count

##----------------------------------------------------------------------
## Carrega pacotes.

library(lattice)
library(latticeExtra)
library(plyr)
library(bbmle)
library(corrplot)
library(car)
library(multcomp)
library(doBy)
##----------------------------------------------------------------------
## Função de probabilidade da Gamma-Count.

dgcount <- function(y, alpha, beta, offset=1){
    pgamma(offset, alpha*y, alpha*beta)-
        pgamma(offset, alpha*y+alpha, alpha*beta)
}

y <- 0:30
pP <- dpois(y, lambda=5)
pGC <- dgcount(y, alpha=1, beta=5)

plot(pP~y, type="h")
points(pGC~c(y+0.2), type="h", col=2)
##----------------------------------------------------------------------
## Explorando a distribuição de probabilidades.

library(rpanel)

draw <- function(input){
    with(input,
    {
        ebeta <- exp(beta)
        ealpha <- exp(alpha)
        pP <- dpois(y, lambda=ebeta*off)
        pGC <- dgcount(y, alpha=ealpha, beta=ebeta, offset=off)
        plot(pP~y, type="h", ylim=c(0, max(c(pGC, pP))))
        points(pGC~c(y+0.2), type="h", col=2)
        abline(v=ebeta, lty=2)
        m <- sum(y*pGC)
        abline(v=ebeta*off, lty=2)
        abline(v=m, col=2, lty=2)
    })
    return(input)
}

panel <- rp.control(y=0:100)
rp.slider(panel, variable=alpha, from=-5, to=5, initval=0,
          showvalue=TRUE, resolution=0.1, action=draw)
rp.slider(panel, variable=beta, from=-5, to=8, initval=3,
          showvalue=TRUE, resolution=0.1, action=draw)
rp.doublebutton(panel, variable=off, range=c(0, 3), initval=1,
                showvalue=TRUE, step=0.1, action=draw)

Reprodução de nematóides em cultivo de soja

Nesse conjunto de dados, a contagem tem um offset que precisa ser considerado.

Análise exploratória

##----------------------------------------------------------------------
## Carrega os dados.

nematodeSoybean1 <- read.table(
    "~/GitLab/legTools/data-raw/nematodeSoybean1.csv",
    header=TRUE, sep=";")
nematodeSoybean1$count <- with(nematodeSoybean1, count1+count2)
str(nematodeSoybean1)
## 'data.frame':    60 obs. of  15 variables:
##  $ inipop : num  0 0 0 0 0 0.0625 0.0625 0.0625 0.0625 0.0625 ...
##  $ rept   : int  21 22 23 24 25 21 22 23 24 25 ...
##  $ das    : int  30 30 30 30 30 30 30 30 30 30 ...
##  $ upfm   : num  191 168 167 207 223 ...
##  $ updm   : num  75.3 69.9 73.7 85 88.9 ...
##  $ rfm    : num  92.9 94.7 145.9 121.4 133.6 ...
##  $ count1 : int  NA NA NA NA NA 61 101 97 72 84 ...
##  $ count2 : int  NA NA NA NA NA 74 94 86 81 77 ...
##  $ volume : int  NA NA NA NA NA 80 100 100 100 100 ...
##  $ finpop : int  NA NA NA NA NA 10800 19500 18300 15300 16100 ...
##  $ rf     : num  NA NA NA NA NA ...
##  $ nemaden: num  NA NA NA NA NA ...
##  $ gweight: num  NA NA NA NA NA NA NA NA NA NA ...
##  $ yield  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ count  : int  NA NA NA NA NA 135 195 183 153 161 ...
##----------------------------------------------------------------------

xyplot(count1+count2~log2(inipop)|factor(das),
       data=nematodeSoybean1,
       type=c("p", "smooth"), jitter.x=TRUE,
       ylab=expression(Nematodes~by~unit~volume),
       xlab=expression(Initial~nematode~density))

xyplot(count~log2(inipop)|factor(das), data=nematodeSoybean1,
       type=c("p", "smooth"), jitter.x=TRUE,
       ylab=expression(Nematodes~by~unit~volume),
       xlab=expression(Initial~nematode~density))

xyplot(count/(2*volume)~log2(inipop)|factor(das), data=nematodeSoybean1,
       type=c("p", "smooth"), jitter.x=TRUE,
       ylab=expression(Nematodes~by~unit~volume),
       xlab=expression(Initial~nematode~density))

##----------------------------------------------------------------------
## Relação média-variância.

da <- subset(nematodeSoybean1, inipop>0)

mv <- ddply(.data=da,
            .variables=.(inipop, das),
            .fun=summarise,
            m=mean(count, na.rm=TRUE),
            v=var(count, na.rm=TRUE))

xyplot(v~m, data=mv, scales="free", type=c("p", "r"))+
    layer(panel.abline(a=0, b=1, lty=2))

Inclusão de offset no modelo Gamma-Count

##----------------------------------------------------------------------
## Simulando um conjunto de dados para ver como acomodar o offset na
## Gamma-Count.

y <- rpois(100, lambda=5) ## Contagem.
o <- rep(2, length(y))    ## Offset.

n0 <- glm(y~1, family=poisson)
## predict(n0, type="response"); exp(coef(n0))
## logLik(n0)

n1 <- glm(y~offset(log(o))+1, family=poisson)
## predict(n1, type="response"); exp(coef(n1))*o
## logLik(n1)

ll <- function(theta, y, X, offset){
    ## theta: parameter vector;
    ##   theta[1]: dispersion parameter;
    ##   theta[-1]: location parameters;
    ## y: response vector (counts);
    ## X: model matrix;
    ## offset: upper limit of the intergral;
    eXb <- exp(X%*%theta[-1])
    alpha <- exp(theta[1])
    alpha.eXb <- alpha*eXb
    alpha.y <- alpha*y
    ## returns the log-likelihood.
    -sum(log(
         pgamma(offset, alpha.y, alpha.eXb)-
             pgamma(offset, alpha.y+alpha, alpha.eXb)
     ))
}

start <- c(alpha=1, lambda=coef(n1))
parnames(ll) <- names(start)
L <- list(y=y, offset=o, X=cbind(rep(1, length(y))))

n2 <- mle2(ll, start=start, fixed=list(alpha=0),
           vecpar=TRUE, data=L)

n3 <- mle2(ll, start=start,
           vecpar=TRUE, data=L)

## Log-verossimilhaça.
cbind(logLik(n0), logLik(n1), logLik(n2), logLik(n3))
##           [,1]      [,2]      [,3]      [,4]
## [1,] -217.0543 -217.0543 -217.0543 -216.9172
## Estimativas dos parâmetros.
cbind(coef(n0), coef(n1), coef(n2)[-1], coef(n3)[-1])
##                [,1]      [,2]      [,3]     [,4]
## (Intercept) 1.60543 0.9122827 0.9122827 0.920382

Ajuste dos modelos

##----------------------------------------------------------------------
## Ajuste do Poisson e Quasi-Poisson.

da <- transform(da, IP=factor(inipop), DAS=factor(das))

## Modelo Poisson para ter referência.
m0 <- glm(count~offset(log(volume))+IP*DAS, data=da, family=poisson)
summary(m0)
## 
## Call:
## glm(formula = count ~ offset(log(volume)) + IP * DAS, family = poisson, 
##     data = da)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -7.7014  -1.9945   0.0103   1.2140   9.0435  
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     0.54402    0.03477  15.645  < 2e-16 ***
## IP0.125         0.55925    0.04328  12.923  < 2e-16 ***
## IP0.25         -0.22355    0.05053  -4.424 9.69e-06 ***
## IP0.5           0.06575    0.04792   1.372    0.170    
## IP1            -0.85824    0.06045 -14.198  < 2e-16 ***
## DAS110         -1.83814    0.08659 -21.227  < 2e-16 ***
## IP0.125:DAS110 -1.01243    0.12984  -7.798 6.30e-15 ***
## IP0.25:DAS110  -0.04809    0.13187  -0.365    0.715    
## IP0.5:DAS110   -0.67992    0.14079  -4.829 1.37e-06 ***
## IP1:DAS110      0.11214    0.15088   0.743    0.457    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 5375.55  on 49  degrees of freedom
## Residual deviance:  486.87  on 40  degrees of freedom
## AIC: 800.72
## 
## Number of Fisher Scoring iterations: 4
anova(m0, test="Chisq")
## Analysis of Deviance Table
## 
## Model: poisson, link: log
## 
## Response: count
## 
## Terms added sequentially (first to last)
## 
## 
##        Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                      49     5375.6              
## IP      4    662.9        45     4712.6 < 2.2e-16 ***
## DAS     1   4124.8        44      587.8 < 2.2e-16 ***
## IP:DAS  4    100.9        40      486.9 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Modelo quasi Poisson para usar os valores iniciais.
## m1 <- glm(count/volume~IP*DAS, data=da, family=quasipoisson)
m1 <- glm(count~offset(log(volume))+IP*DAS,
          data=da, family=quasipoisson)
summary(m1)
## 
## Call:
## glm(formula = count ~ offset(log(volume)) + IP * DAS, family = quasipoisson, 
##     data = da)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -7.7014  -1.9945   0.0103   1.2140   9.0435  
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     0.54402    0.12509   4.349 9.16e-05 ***
## IP0.125         0.55925    0.15567   3.592 0.000887 ***
## IP0.25         -0.22355    0.18177  -1.230 0.225947    
## IP0.5           0.06575    0.17238   0.381 0.704912    
## IP1            -0.85824    0.21745  -3.947 0.000312 ***
## DAS110         -1.83814    0.31150  -5.901 6.51e-07 ***
## IP0.125:DAS110 -1.01243    0.46706  -2.168 0.036192 *  
## IP0.25:DAS110  -0.04809    0.47437  -0.101 0.919754    
## IP0.5:DAS110   -0.67992    0.50646  -1.343 0.187003    
## IP1:DAS110      0.11214    0.54275   0.207 0.837353    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasipoisson family taken to be 12.94054)
## 
##     Null deviance: 5375.55  on 49  degrees of freedom
## Residual deviance:  486.87  on 40  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 4
anova(m1, test="F")
## Analysis of Deviance Table
## 
## Model: quasipoisson, link: log
## 
## Response: count
## 
## Terms added sequentially (first to last)
## 
## 
##        Df Deviance Resid. Df Resid. Dev        F    Pr(>F)    
## NULL                      49     5375.6                       
## IP      4    662.9        45     4712.6  12.8076 8.431e-07 ***
## DAS     1   4124.8        44      587.8 318.7506 < 2.2e-16 ***
## IP:DAS  4    100.9        40      486.9   1.9499    0.1209    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
par(mfrow=c(2,2)); plot(m1); layout(1)

cbind(coef(m0), coef(m1))
##                       [,1]        [,2]
## (Intercept)     0.54401859  0.54401859
## IP0.125         0.55924951  0.55924951
## IP0.25         -0.22354670 -0.22354670
## IP0.5           0.06574698  0.06574698
## IP1            -0.85824022 -0.85824022
## DAS110         -1.83814249 -1.83814249
## IP0.125:DAS110 -1.01243332 -1.01243332
## IP0.25:DAS110  -0.04809225 -0.04809225
## IP0.5:DAS110   -0.67991637 -0.67991637
## IP1:DAS110      0.11214329  0.11214329
##----------------------------------------------------------------------
## Ajuste do Gamma-Count.

start <- c("alpha"=log(1), coef(m0))
parnames(ll) <- names(start)

## y <- with(da, count/vol)
y <- with(da, count/volume)
X <- model.matrix(~IP*DAS, data=da)

L <- with(da,
          list(y=count, offset=volume,
               X=model.matrix(~IP*DAS)))

m2 <- mle2(ll, start=start, fixed=list(alpha=0),
           data=L, vecpar=TRUE)
## Warning in mle2(ll, start = start, fixed = list(alpha = 0), data =
## L, vecpar = TRUE): couldn't invert Hessian
m3 <- mle2(ll, start=start,
           data=L, vecpar=TRUE)

Comparação dos ajustes

##----------------------------------------------------------------------

## Log-verossimilhaça.
cbind(logLik(m0), logLik(m1), logLik(m2), logLik(m3))
##           [,1] [,2]      [,3]      [,4]
## [1,] -390.3592   NA -390.3366 -227.2373
## Estimativas dos parâmetros.
cbind(coef(m0), coef(m1), coef(m2)[-1], coef(m3)[-1])
##                       [,1]        [,2]        [,3]        [,4]
## (Intercept)     0.54401859  0.54401859  0.54393739  0.51310439
## IP0.125         0.55924951  0.55924951  0.55964189  0.57308855
## IP0.25         -0.22354670 -0.22354670 -0.22366867 -0.22717158
## IP0.5           0.06574698  0.06574698  0.06560404  0.06860458
## IP1            -0.85824022 -0.85824022 -0.85830921 -0.89167062
## DAS110         -1.83814249 -1.83814249 -1.83823781 -1.97733430
## IP0.125:DAS110 -1.01243332 -1.01243332 -1.01246587 -1.09033430
## IP0.25:DAS110  -0.04809225 -0.04809225 -0.04810883 -0.10442588
## IP0.5:DAS110   -0.67991637 -0.67991637 -0.67992897 -0.82075570
## IP1:DAS110      0.11214329  0.11214329  0.11213224 -0.04105420
## Estimativa do parâmetro de dispersão.
exp(coef(m3)[1])
##      alpha 
## 0.08983008
## Perfil de log-verossmilhança.
plot(profile(m3, which=1))

## Teste para o parâmetro de dispersão.
anova(m3, m2)
## Likelihood Ratio Tests
## Model 1: m3, [ll]: alpha+(Intercept)+IP0.125+IP0.25+IP0.5+IP1+
##           DAS110+IP0.125:DAS110+IP0.25:DAS110+IP0.5:DAS110+
##           IP1:DAS110
## Model 2: m2, [ll]: (Intercept)+IP0.125+IP0.25+IP0.5+IP1+
##           DAS110+IP0.125:DAS110+IP0.25:DAS110+IP0.5:DAS110+
##           IP1:DAS110
##   Tot Df Deviance Chisq Df Pr(>Chisq)    
## 1     11   454.47                        
## 2     10   780.67 326.2  1  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Teste para interação (Wald).
a <- c(0, attr(model.matrix(m0), "assign"))
ai <- a==3
L <- t(replicate(sum(ai), rbind(coef(m3)*0), simplify="matrix"))
L[,ai] <- diag(sum(ai))

## Teste de Wald explicito.
## t(L%*%coef(m3))%*%
##     solve(L%*%vcov(m3)%*%t(L))%*%
##     (L%*%coef(m3))
crossprod(L%*%coef(m3), solve(L%*%vcov(m3)%*%t(L), L%*%coef(m3)))
##         [,1]
## [1,] 7.85688
## Teste de Wald para interação (poderia ser LRT, claro).
linearHypothesis(model=m0, ## É necessário um objeto glm.
                 hypothesis.matrix=L,
                 vcov.=vcov(m3),
                 coef.=coef(m3))
## Linear hypothesis test
## 
## Hypothesis:
## IP0.125:DAS110 = 0
## IP0.25:DAS110 = 0
## IP0.5:DAS110 = 0
## IP1:DAS110 = 0
## 
## Model 1: restricted model
## Model 2: count ~ offset(log(volume)) + IP * DAS
## 
## Note: Coefficient covariance matrix supplied.
## 
##   Res.Df Df  Chisq Pr(>Chisq)  
## 1     44                       
## 2     40  4 7.8569    0.09696 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Teste pelo modelo quasi Poisson.
anova(m1, test="Chisq")
## Analysis of Deviance Table
## 
## Model: quasipoisson, link: log
## 
## Response: count
## 
## Terms added sequentially (first to last)
## 
## 
##        Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                      49     5375.6              
## IP      4    662.9        45     4712.6 1.998e-10 ***
## DAS     1   4124.8        44      587.8 < 2.2e-16 ***
## IP:DAS  4    100.9        40      486.9    0.0992 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m1, test="F")
## Analysis of Deviance Table
## 
## Model: quasipoisson, link: log
## 
## Response: count
## 
## Terms added sequentially (first to last)
## 
## 
##        Df Deviance Resid. Df Resid. Dev        F    Pr(>F)    
## NULL                      49     5375.6                       
## IP      4    662.9        45     4712.6  12.8076 8.431e-07 ***
## DAS     1   4124.8        44      587.8 318.7506 < 2.2e-16 ***
## IP:DAS  4    100.9        40      486.9   1.9499    0.1209    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##----------------------------------------------------------------------
## Ortogonalidade entre dispersão e locação.

V <- cov2cor(vcov(m3))
corrplot.mixed(V, upper="ellipse", col="gray50")

dev.off()
## null device 
##           1
V[1,-1]
##    (Intercept)        IP0.125         IP0.25          IP0.5 
##    0.067375821   -0.024481873    0.005491339   -0.004596027 
##            IP1         DAS110 IP0.125:DAS110  IP0.25:DAS110 
##    0.042363354    0.120976452    0.046012475    0.033451032 
##   IP0.5:DAS110     IP1:DAS110 
##    0.076770198    0.078610834
sum(abs(V[1,-1]))
## [1] 0.5001294

Predição

##----------------------------------------------------------------------
## Predição das médias com IC.

pred <- expand.grid(IP=levels(da$IP),
                    DAS=levels(da$DAS),
                    volume=100,
                    KEEP.OUT.ATTRS=FALSE)
pred <- list(pois=pred, quasi=pred, cg=pred)

## Quantil normal.
qn <- qnorm(0.975)*c(lwr=-1, fit=0, upr=1)

## Preditos pela Poisson.
aux <- predict(m0, newdata=pred$pois, se.fit=TRUE)
aux <- exp(aux$fit+outer(aux$se.fit, qn, FUN="*"))
pred$pois <- cbind(pred$pois, aux)
str(pred$pois)
## 'data.frame':    10 obs. of  6 variables:
##  $ IP    : Factor w/ 5 levels "0.0625","0.125",..: 1 2 3 4 5 1 2 3 4 5
##  $ DAS   : Factor w/ 2 levels "30","110": 1 1 1 1 1 2 2 2 2 2
##  $ volume: num  100 100 100 100 100 100 100 100 100 100
##  $ lwr   : num  160.9 286.6 128.2 172.5 66.3 ...
##  $ fit   : num  172 301 138 184 73 ...
##  $ upr   : num  184.4 317 148 196.3 80.5 ...
aux <- predict(m1, newdata=pred$quasi, se.fit=TRUE)
aux <- exp(aux$fit+outer(aux$se.fit, qn, FUN="*"))
pred$quasi <- cbind(pred$quasi, aux)

alpha <- coef(m3)[1]  ## Locação.
theta <- coef(m3)[-1] ## Dispersão.

## Preditor linear.
X <- model.matrix(~IP*DAS, data=pred$cg)
pred$cg$eta <- c(X%*%theta)

## Matrix de covariância completa.
V <- vcov(m3)

## Marginal em theta.
Vm <- V[-1,-1]

## Condicional.
Vc <- V[-1,-1]-V[-1,1]%*%solve(V[1,1])%*%V[1,-1]

max(abs(Vm-Vc))
## [1] 0.001924182
## Preditos pela Gamma-count.
U <- chol(Vm)
aux <- sqrt(apply(X%*%t(U), MARGIN=1,
                  FUN=function(x){ sum(x^2) }))

## Para calcular a média de uma Gamma-Count pelo preditor linear.
eta2mean <- function(eta, offset, alpha, tol=1e-5){
    stopifnot(length(eta)==1L)
    stopifnot(length(offset)==1L)
    stopifnot(length(alpha)==1L)
    change <- 1
    S <- 0; x <- 1; p <- 1
    while (p>tol){
        p <- pgamma(offset,
                    shape=exp(alpha)*x,
                    rate=exp(alpha)*exp(eta))
        S <- S+p
        x <- x+1
    }
    return(S)
}

aux <- pred$cg$eta+outer(aux, qn, FUN="*")
aux <- apply(aux, MARGIN=2,
             FUN=function(col){
                 sapply(col, FUN=eta2mean,
                        offset=pred$cg$volume[1], alpha=alpha)
             })
pred$cg <- cbind(pred$cg, aux)

##----------------------------------------------------------------------

pred <- ldply(pred, .id="modelo")
pred <- arrange(pred, DAS, IP, modelo)
str(pred)
## 'data.frame':    30 obs. of  8 variables:
##  $ modelo: Factor w/ 3 levels "pois","quasi",..: 1 2 3 1 2 3 1 2 3 1 ...
##  $ IP    : Factor w/ 5 levels "0.0625","0.125",..: 1 1 1 2 2 2 3 3 3 4 ...
##  $ DAS   : Factor w/ 2 levels "30","110": 1 1 1 1 1 1 1 1 1 1 ...
##  $ volume: num  100 100 100 100 100 100 100 100 100 100 ...
##  $ lwr   : num  161 135 138 287 251 ...
##  $ fit   : num  172 172 172 301 301 ...
##  $ upr   : num  184 220 215 317 361 ...
##  $ eta   : num  NA NA 0.513 NA NA ...
source("http://git.leg.ufpr.br/leg/legTools/raw/devel/R/panel.segplot.by.R")

segplot(IP~lwr+upr|DAS, centers=fit, data=pred,
        draw=FALSE, scales=list(x="free"),
        panel=panel.segplot.by, groups=pred$modelo, f=0.1,
        pch=pred$modelo, layout=c(1, NA), as.table=TRUE, 
        key=list(type="o", divide=1,
                 lines=list(pch=1:nlevels(pred$modelo), lty=1, col=1),
                 text=list(c("Poisson", "Quasi-Poisson",
                             "Gamma-Count"))))


Número de vagens viáveis em soja

Nesse conjunto de dados, o efeito de bloco pode ser aleatório. Teria que implementar uma versão da Gamma-Count para acomodar termos de efeito aleatório.

Análise exploratória

##----------------------------------------------------------------------
## Carrega os dados.

db <- read.table("http://www.leg.ufpr.br/~walmes/data/soja.txt",
                 header=TRUE, sep="\t", dec=",")
names(db)[1:2] <- c("k", "a")
db <- db[-74,c(1:3, 8, 10)]
str(db)
## 'data.frame':    74 obs. of  5 variables:
##  $ k    : int  0 30 60 120 180 0 30 60 120 180 ...
##  $ a    : num  37.5 37.5 37.5 37.5 37.5 50 50 50 50 50 ...
##  $ bloco: Factor w/ 5 levels "I","II","III",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ ts   : int  136 159 156 171 190 140 193 200 208 237 ...
##  $ nv   : int  56 62 66 68 82 63 86 94 86 97 ...
xyplot(nv~k|a, data=db)

xyplot(ts~k|a, data=db)

##----------------------------------------------------------------------
## Gráfico de média-variância não é possível pois não existem repetições
## com o mesmo preditor.

Ajuste dos modelos

##----------------------------------------------------------------------
## Ajuste do Poisson e Quasi-Poisson.

db <- transform(db, A=factor(a), K=factor(k))

## Modelo Poisson para ter referência.
m0 <- glm(nv~bloco+A*K, data=db, family=poisson)
summary(m0)
## 
## Call:
## glm(formula = nv ~ bloco + A * K, family = poisson, data = db)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.9681  -0.6393  -0.0195   0.4854   2.3306  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  3.95369    0.06892  57.363  < 2e-16 ***
## blocoII     -0.02928    0.04091  -0.716  0.47414    
## blocoIII    -0.07265    0.04136  -1.756  0.07902 .  
## blocoIV     -0.12544    0.04194  -2.991  0.00278 ** 
## blocoV      -0.10795    0.04297  -2.512  0.01199 *  
## A50          0.13404    0.08765   1.529  0.12619    
## A62.5        0.21656    0.08602   2.518  0.01181 *  
## K30          0.27427    0.08493   3.229  0.00124 ** 
## K60          0.30797    0.08432   3.652  0.00026 ***
## K120         0.32883    0.08395   3.917 8.97e-05 ***
## K180         0.25540    0.08528   2.995  0.00275 ** 
## A50:K30      0.06322    0.11557   0.547  0.58433    
## A62.5:K30   -0.10747    0.11536  -0.932  0.35152    
## A50:K60      0.16561    0.11370   1.457  0.14521    
## A62.5:K60    0.10735    0.11220   0.957  0.33869    
## A50:K120     0.14920    0.11338   1.316  0.18818    
## A62.5:K120   0.11839    0.11404   1.038  0.29922    
## A50:K180     0.30370    0.11361   2.673  0.00751 ** 
## A62.5:K180   0.19838    0.11256   1.762  0.07800 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 322.684  on 73  degrees of freedom
## Residual deviance:  65.278  on 55  degrees of freedom
## AIC: 557.23
## 
## Number of Fisher Scoring iterations: 4
anova(m0, test="Chisq")
## Analysis of Deviance Table
## 
## Model: poisson, link: log
## 
## Response: nv
## 
## Terms added sequentially (first to last)
## 
## 
##       Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                     73     322.68              
## bloco  4   14.284        69     308.40  0.006442 ** 
## A      2   92.911        67     215.49 < 2.2e-16 ***
## K      4  136.060        63      79.43 < 2.2e-16 ***
## A:K    8   14.150        55      65.28  0.077926 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Modelo quasi Poisson para usar os valores iniciais.
## m1 <- glm(count/volume~IP*DAS, data=da, family=quasipoisson)
m1 <- update(m0, family=quasipoisson)
summary(m1)
## 
## Call:
## glm(formula = nv ~ bloco + A * K, family = quasipoisson, data = db)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.9681  -0.6393  -0.0195   0.4854   2.3306  
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.95369    0.07547  52.389  < 2e-16 ***
## blocoII     -0.02928    0.04479  -0.654 0.516036    
## blocoIII    -0.07265    0.04529  -1.604 0.114420    
## blocoIV     -0.12544    0.04592  -2.732 0.008457 ** 
## blocoV      -0.10795    0.04705  -2.294 0.025606 *  
## A50          0.13404    0.09597   1.397 0.168117    
## A62.5        0.21656    0.09418   2.299 0.025303 *  
## K30          0.27427    0.09300   2.949 0.004670 ** 
## K60          0.30797    0.09233   3.336 0.001530 ** 
## K120         0.32883    0.09192   3.577 0.000734 ***
## K180         0.25540    0.09338   2.735 0.008376 ** 
## A50:K30      0.06322    0.12654   0.500 0.619324    
## A62.5:K30   -0.10747    0.12631  -0.851 0.398532    
## A50:K60      0.16561    0.12449   1.330 0.188897    
## A62.5:K60    0.10735    0.12286   0.874 0.386028    
## A50:K120     0.14920    0.12414   1.202 0.234563    
## A62.5:K120   0.11839    0.12487   0.948 0.347229    
## A50:K180     0.30370    0.12439   2.441 0.017873 *  
## A62.5:K180   0.19838    0.12325   1.610 0.113208    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasipoisson family taken to be 1.198902)
## 
##     Null deviance: 322.684  on 73  degrees of freedom
## Residual deviance:  65.278  on 55  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 4
anova(m1, test="F")
## Analysis of Deviance Table
## 
## Model: quasipoisson, link: log
## 
## Response: nv
## 
## Terms added sequentially (first to last)
## 
## 
##       Df Deviance Resid. Df Resid. Dev       F    Pr(>F)    
## NULL                     73     322.68                      
## bloco  4   14.284        69     308.40  2.9785   0.02684 *  
## A      2   92.911        67     215.49 38.7485 3.157e-11 ***
## K      4  136.060        63      79.43 28.3719 8.316e-13 ***
## A:K    8   14.150        55      65.28  1.4754   0.18750    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
par(mfrow=c(2,2)); plot(m1); layout(1)

cbind(coef(m0), coef(m1))
##                    [,1]        [,2]
## (Intercept)  3.95368724  3.95368724
## blocoII     -0.02927854 -0.02927854
## blocoIII    -0.07265048 -0.07265048
## blocoIV     -0.12543798 -0.12543798
## blocoV      -0.10794857 -0.10794857
## A50          0.13404356  0.13404356
## A62.5        0.21656458  0.21656458
## K30          0.27427290  0.27427290
## K60          0.30796674  0.30796674
## K120         0.32883188  0.32883188
## K180         0.25540441  0.25540441
## A50:K30      0.06322288  0.06322288
## A62.5:K30   -0.10747272 -0.10747272
## A50:K60      0.16561471  0.16561471
## A62.5:K60    0.10735066  0.10735066
## A50:K120     0.14920392  0.14920392
## A62.5:K120   0.11838578  0.11838578
## A50:K180     0.30369921  0.30369921
## A62.5:K180   0.19837927  0.19837927
##----------------------------------------------------------------------
## Ajuste do Gamma-Count.

start <- c("alpha"=log(1), coef(m0))
parnames(ll) <- names(start)

y <- with(db, nv)
X <- model.matrix(~bloco+A*K, data=db)

L <- with(db,
          list(y=nv, offset=1,
               X=model.matrix(~bloco+A*K)))

m2 <- mle2(ll, start=start, fixed=list(alpha=0),
           data=L, vecpar=TRUE)

m3 <- mle2(ll, start=start,
           data=L, vecpar=TRUE)

Comparação dos ajustes

##----------------------------------------------------------------------

## Log-verossimilhaça.
cbind(logLik(m0), logLik(m1), logLik(m2), logLik(m3))
##           [,1] [,2]      [,3]      [,4]
## [1,] -259.6165   NA -259.6165 -259.3264
## Estimativas dos parâmetros.
cbind(coef(m0), coef(m1), coef(m2)[-1], coef(m3)[-1])
##                    [,1]        [,2]        [,3]        [,4]
## (Intercept)  3.95368724  3.95368724  3.95368723  3.95487435
## blocoII     -0.02927854 -0.02927854 -0.02927855 -0.02925363
## blocoIII    -0.07265048 -0.07265048 -0.07265048 -0.07259276
## blocoIV     -0.12543798 -0.12543798 -0.12543798 -0.12534177
## blocoV      -0.10794857 -0.10794857 -0.10794857 -0.10785674
## A50          0.13404356  0.13404356  0.13404356  0.13388357
## A62.5        0.21656458  0.21656458  0.21656458  0.21632039
## K30          0.27427290  0.27427290  0.27427290  0.27397250
## K60          0.30796674  0.30796674  0.30796674  0.30763762
## K120         0.32883188  0.32883188  0.32883188  0.32848139
## K180         0.25540441  0.25540441  0.25540441  0.25512492
## A50:K30      0.06322288  0.06322288  0.06322288  0.06321988
## A62.5:K30   -0.10747272 -0.10747272 -0.10747272 -0.10732539
## A50:K60      0.16561471  0.16561471  0.16561471  0.16553912
## A62.5:K60    0.10735066  0.10735066  0.10735066  0.10734196
## A50:K120     0.14920392  0.14920392  0.14920392  0.14914452
## A62.5:K120   0.11838578  0.11838578  0.11838578  0.11838048
## A50:K180     0.30369921  0.30369921  0.30369921  0.30351780
## A62.5:K180   0.19837927  0.19837927  0.19837927  0.19829595
## Estimativa do parâmetro de dispersão.
exp(coef(m3)[1])
##    alpha 
## 1.137415
## Perfil de log-verossmilhança.
plot(profile(m3, which=1))

## Teste para o parâmetro de dispersão.
anova(m3, m2)
## Likelihood Ratio Tests
## Model 1: m3, [ll]: alpha+(Intercept)+blocoII+blocoIII+blocoIV+
##           blocoV+A50+A62.5+K30+K60+K120+K180+A50:K30+
##           A62.5:K30+A50:K60+A62.5:K60+A50:K120+A62.5:K120+
##           A50:K180+A62.5:K180
## Model 2: m2, [ll]: (Intercept)+blocoII+blocoIII+blocoIV+blocoV+
##           A50+A62.5+K30+K60+K120+K180+A50:K30+A62.5:K30+
##           A50:K60+A62.5:K60+A50:K120+A62.5:K120+A50:K180+
##           A62.5:K180
##   Tot Df Deviance  Chisq Df Pr(>Chisq)
## 1     20   518.65                     
## 2     19   519.23 0.5802  1     0.4462
## Teste para interação (Wald).
a <- c(0, attr(model.matrix(m0), "assign"))
ai <- a==4
L <- t(replicate(sum(ai), rbind(coef(m3)*0), simplify="matrix"))
L[,ai] <- diag(sum(ai))

## Teste de Wald explicito.
## t(L%*%coef(m3))%*%
##     solve(L%*%vcov(m3)%*%t(L))%*%
##     (L%*%coef(m3))
crossprod(L%*%coef(m3), solve(L%*%vcov(m3)%*%t(L), L%*%coef(m3)))
##          [,1]
## [1,] 15.96558
## Teste de Wald para interação (poderia ser LRT, claro).
linearHypothesis(model=m0, ## É necessário um objeto glm.
                 hypothesis.matrix=L,
                 vcov.=vcov(m3),
                 coef.=coef(m3))
## Linear hypothesis test
## 
## Hypothesis:
## A50:K30 = 0
## A62.5:K30 = 0
## A50:K60 = 0
## A62.5:K60 = 0
## A50:K120 = 0
## A62.5:K120 = 0
## A50:K180 = 0
## A62.5:K180 = 0
## 
## Model 1: restricted model
## Model 2: nv ~ bloco + A * K
## 
## Note: Coefficient covariance matrix supplied.
## 
##   Res.Df Df  Chisq Pr(>Chisq)  
## 1     63                       
## 2     55  8 15.966    0.04288 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Teste pelo modelo quasi Poisson.
anova(m1, test="Chisq")
## Analysis of Deviance Table
## 
## Model: quasipoisson, link: log
## 
## Response: nv
## 
## Terms added sequentially (first to last)
## 
## 
##       Df Deviance Resid. Df Resid. Dev Pr(>Chi)    
## NULL                     73     322.68             
## bloco  4   14.284        69     308.40   0.0180 *  
## A      2   92.911        67     215.49   <2e-16 ***
## K      4  136.060        63      79.43   <2e-16 ***
## A:K    8   14.150        55      65.28   0.1602    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m1, test="F")
## Analysis of Deviance Table
## 
## Model: quasipoisson, link: log
## 
## Response: nv
## 
## Terms added sequentially (first to last)
## 
## 
##       Df Deviance Resid. Df Resid. Dev       F    Pr(>F)    
## NULL                     73     322.68                      
## bloco  4   14.284        69     308.40  2.9785   0.02684 *  
## A      2   92.911        67     215.49 38.7485 3.157e-11 ***
## K      4  136.060        63      79.43 28.3719 8.316e-13 ***
## A:K    8   14.150        55      65.28  1.4754   0.18750    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##----------------------------------------------------------------------
## Ortogonalidade entre dispersão e locação.

V <- vcov(m3)
corrplot.mixed(V, upper="ellipse", col="gray50")

dev.off()
## null device 
##           1
V[1,-1]
##   (Intercept)       blocoII      blocoIII       blocoIV 
##  2.365324e-04  5.001425e-06  1.151560e-05  1.911075e-05 
##        blocoV           A50         A62.5           K30 
##  1.837628e-05 -3.193628e-05 -4.869315e-05 -5.989025e-05 
##           K60          K120          K180       A50:K30 
## -6.558486e-05 -6.987732e-05 -5.568224e-05 -4.608388e-07 
##     A62.5:K30       A50:K60     A62.5:K60      A50:K120 
##  2.939089e-05 -1.498535e-05 -1.705038e-06 -1.175731e-05 
##    A62.5:K120      A50:K180    A62.5:K180 
## -9.755547e-07 -3.610277e-05 -1.658983e-05
sum(abs(V[1,-1]))
## [1] 0.0007341682

Predição

##----------------------------------------------------------------------
## Predição das médias com IC.

pred <- expand.grid(A=levels(db$A),
                    K=levels(db$K),
                    bloco=factor("I", levels=levels(db$bloco)),
                    KEEP.OUT.ATTRS=FALSE)
pred <- list(pois=pred, quasi=pred, cg=pred)

## Quantil normal.
qn <- qnorm(0.975)*c(lwr=-1, fit=0, upr=1)

## Preditor linear que considera o efeito médio dos blocos.
X <- LSmatrix(m0, effect=c("A", "K"))

## Preditos pela Poisson.
## aux <- predict(m0, newdata=pred$pois, se.fit=TRUE)
## aux <- exp(aux$fit+outer(aux$se.fit, qn, FUN="*"))
## pred$pois <- cbind(pred$pois, aux)
aux <- confint(glht(m0, linfct=X), calpha=univariate_calpha())$confint
colnames(aux)[1] <- "fit"
pred$pois <- cbind(pred$pois, exp(aux))

## aux <- predict(m1, newdata=pred$quasi, se.fit=TRUE)
## aux <- exp(aux$fit+outer(aux$se.fit, qn, FUN="*"))
## pred$quasi <- cbind(pred$quasi, aux)
aux <- confint(glht(m1, linfct=X), calpha=univariate_calpha())$confint
colnames(aux)[1] <- "fit"
pred$quasi <- cbind(pred$quasi, exp(aux))

alpha <- coef(m3)[1]  ## Locação.
theta <- coef(m3)[-1] ## Dispersão.

## Preditor linear.
pred$cg$eta <- c(X%*%theta)

## Matrix de covariância completa.
V <- vcov(m3)

## Marginal em theta.
Vm <- V[-1,-1]

## Condicional.
Vc <- V[-1,-1]-V[-1,1]%*%solve(V[1,1])%*%V[1,-1]

max(abs(Vm-Vc))
## [1] 2.043359e-06
## Preditos pela Gamma-count.
U <- chol(Vm)
aux <- sqrt(apply(X%*%t(U), MARGIN=1,
                  FUN=function(x){ sum(x^2) }))

aux <- pred$cg$eta+outer(aux, qn, FUN="*")
aux <- apply(aux, MARGIN=2,
             FUN=function(col){
                 sapply(col, FUN=eta2mean,
                        offset=1, alpha=alpha)
             })
pred$cg <- cbind(pred$cg, aux)

##----------------------------------------------------------------------

pred <- ldply(pred, .id="modelo")
pred <- arrange(pred, A, K, modelo)
str(pred)
## 'data.frame':    45 obs. of  8 variables:
##  $ modelo: Factor w/ 3 levels "pois","quasi",..: 1 2 3 1 2 3 1 2 3 1 ...
##  $ A     : Factor w/ 3 levels "37.5","50","62.5": 1 1 1 1 1 1 1 1 1 1 ...
##  $ K     : Factor w/ 5 levels "0","30","60",..: 1 1 1 2 2 2 3 3 3 4 ...
##  $ bloco : Factor w/ 5 levels "I","II","III",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ fit   : num  48.7 48.7 48.7 64.1 64.1 ...
##  $ lwr   : num  43 42.5 43.3 57.5 56.9 ...
##  $ upr   : num  55.3 55.9 54.8 71.5 72.3 ...
##  $ eta   : num  NA NA 3.89 NA NA ...
xyplot(nv~K|A, data=db, layout=c(NA, 1), as.table=TRUE,
       col=1, cex=0.7, pch=19)+
    as.layer(
        segplot(K~lwr+upr|A, centers=fit, data=pred,
                draw=FALSE, horizontal=FALSE, ## scales=list(x="free"),
                panel=panel.segplot.by, groups=pred$modelo, f=0.15,
                pch=pred$modelo, layout=c(NA, 1), as.table=TRUE, 
                key=list(type="o", divide=1,
                         lines=list(pch=1:nlevels(pred$modelo),
                                    lty=1, col=1),
                         text=list(c("Poisson", "Quasi-Poisson",
                                     "Gamma-Count"))))
    )

Ocorrência de mosca branca

http://www.leg.ufpr.br/~walmes/data/ninfas.txt

Análise exploratória

##----------------------------------------------------------------------
## Análise exploratória.

dc <- read.table("moscaBranca.txt", header=TRUE, sep="\t")
dc <- transform(dc,
                data=as.Date(dc$data),
                bloc=factor(bloc),
                tot=inf+med+sup,
                off=100)
dc$aval <- with(dc, c(data-min(data)))
dc$Aval <- factor(dc$aval)
names(dc)[2:3] <- c("Vari","Bloc")
dc <- droplevels(subset(dc, grepl("BRS", x=Vari)))
str(dc)
## 'data.frame':    96 obs. of  10 variables:
##  $ data: Date, format: "2009-12-11" ...
##  $ Vari: Factor w/ 4 levels "BRS 239","BRS 243 RR",..: 3 3 3 3 2 2 2 2 4 4 ...
##  $ Bloc: Factor w/ 4 levels "1","2","3","4": 1 2 3 4 1 2 3 4 1 2 ...
##  $ sup : int  168 30 28 6 31 4 3 48 100 54 ...
##  $ med : int  71 46 36 1 22 6 12 76 50 15 ...
##  $ inf : int  39 22 7 31 28 10 27 13 34 7 ...
##  $ tot : int  278 98 71 38 81 20 42 137 184 76 ...
##  $ off : num  100 100 100 100 100 100 100 100 100 100 ...
##  $ aval: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Aval: Factor w/ 6 levels "0","8","13","22",..: 1 1 1 1 1 1 1 1 1 1 ...
## xyplot(sup+med+inf~data, groups=Vari, outer=TRUE, data=dc)
xyplot(tot~data|Vari, data=dc)

Ajuste dos modelos

##----------------------------------------------------------------------
## Ajuste do Poisson e Quasi-Poisson.

## IMPORTANT: Foi usado um offset de 100 por problemas de
## over/underflow. O problema não é com a Poisson mas com a
## Gamma-Count. Todas as análises consideram um offset de 100, portanto,
## permanecem comparáveis e igualmente interpretáveis.

## Modelo Poisson para ter referência.
m0 <- glm(tot~offset(log(off))+Bloc+Vari*Aval,
          data=dc, family=poisson)
summary(m0)
## 
## Call:
## glm(formula = tot ~ offset(log(off)) + Bloc + Vari * Aval, family = poisson, 
##     data = dc)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -12.9723   -2.7083   -0.9217    1.8305   11.3713  
## 
## Coefficients:
##                        Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            0.616222   0.049692  12.401  < 2e-16 ***
## Bloc2                 -0.520193   0.032281 -16.114  < 2e-16 ***
## Bloc3                 -0.812680   0.035555 -22.857  < 2e-16 ***
## Bloc4                 -0.993601   0.037919 -26.204  < 2e-16 ***
## VariBRS 243 RR        -0.465529   0.076247  -6.106 1.02e-09 ***
## VariBRS 245 RR         0.083830   0.065605   1.278 0.201320    
## VariBRS 246 RR        -0.250994   0.071582  -3.506 0.000454 ***
## Aval8                  0.289922   0.062610   4.631 3.65e-06 ***
## Aval13                 0.423243   0.060915   6.948 3.70e-12 ***
## Aval22                -0.711247   0.082513  -8.620  < 2e-16 ***
## Aval31                -2.881443   0.205529 -14.020  < 2e-16 ***
## Aval38                -3.155880   0.234176 -13.477  < 2e-16 ***
## VariBRS 243 RR:Aval8  -0.204400   0.103779  -1.970 0.048889 *  
## VariBRS 245 RR:Aval8  -0.243589   0.089165  -2.732 0.006297 ** 
## VariBRS 246 RR:Aval8   0.067329   0.093904   0.717 0.473373    
## VariBRS 243 RR:Aval13 -0.536572   0.106217  -5.052 4.38e-07 ***
## VariBRS 245 RR:Aval13 -0.489295   0.089303  -5.479 4.28e-08 ***
## VariBRS 246 RR:Aval13  0.005953   0.092025   0.065 0.948419    
## VariBRS 243 RR:Aval22 -0.136051   0.136796  -0.995 0.319954    
## VariBRS 245 RR:Aval22 -0.620871   0.129162  -4.807 1.53e-06 ***
## VariBRS 246 RR:Aval22 -0.094652   0.127050  -0.745 0.456272    
## VariBRS 243 RR:Aval31  0.137025   0.318385   0.430 0.666922    
## VariBRS 245 RR:Aval31  0.334880   0.265744   1.260 0.207611    
## VariBRS 246 RR:Aval31  0.364323   0.284321   1.281 0.200060    
## VariBRS 243 RR:Aval38  0.160147   0.360325   0.444 0.656716    
## VariBRS 245 RR:Aval38  0.555250   0.290995   1.908 0.056377 .  
## VariBRS 246 RR:Aval38  0.196927   0.336557   0.585 0.558466    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 7106.3  on 95  degrees of freedom
## Residual deviance: 1283.8  on 69  degrees of freedom
## AIC: 1812.5
## 
## Number of Fisher Scoring iterations: 5
anova(m0, test="Chisq")
## Analysis of Deviance Table
## 
## Model: poisson, link: log
## 
## Response: tot
## 
## Terms added sequentially (first to last)
## 
## 
##           Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                         95     7106.3              
## Bloc       3    948.6        92     6157.7 < 2.2e-16 ***
## Vari       3    354.2        89     5803.5 < 2.2e-16 ***
## Aval       5   4432.2        84     1371.3 < 2.2e-16 ***
## Vari:Aval 15     87.5        69     1283.8 2.897e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Modelo quasi Poisson para usar os valores iniciais.
## m1 <- glm(count/volume~IP*DAS, data=da, family=quasipoisson)
m1 <- update(m0, family=quasipoisson)
summary(m1)
## 
## Call:
## glm(formula = tot ~ offset(log(off)) + Bloc + Vari * Aval, family = quasipoisson, 
##     data = dc)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -12.9723   -2.7083   -0.9217    1.8305   11.3713  
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            0.616222   0.216765   2.843 0.005877 ** 
## Bloc2                 -0.520193   0.140816  -3.694 0.000438 ***
## Bloc3                 -0.812680   0.155096  -5.240 1.66e-06 ***
## Bloc4                 -0.993601   0.165406  -6.007 7.93e-08 ***
## VariBRS 243 RR        -0.465529   0.332600  -1.400 0.166092    
## VariBRS 245 RR         0.083830   0.286178   0.293 0.770455    
## VariBRS 246 RR        -0.250994   0.312251  -0.804 0.424261    
## Aval8                  0.289922   0.273114   1.062 0.292145    
## Aval13                 0.423243   0.265718   1.593 0.115770    
## Aval22                -0.711247   0.359933  -1.976 0.052148 .  
## Aval31                -2.881443   0.896547  -3.214 0.001992 ** 
## Aval38                -3.155880   1.021509  -3.089 0.002890 ** 
## VariBRS 243 RR:Aval8  -0.204400   0.452700  -0.452 0.653036    
## VariBRS 245 RR:Aval8  -0.243589   0.388950  -0.626 0.533202    
## VariBRS 246 RR:Aval8   0.067329   0.409623   0.164 0.869921    
## VariBRS 243 RR:Aval13 -0.536572   0.463333  -1.158 0.250828    
## VariBRS 245 RR:Aval13 -0.489295   0.389554  -1.256 0.213338    
## VariBRS 246 RR:Aval13  0.005953   0.401426   0.015 0.988210    
## VariBRS 243 RR:Aval22 -0.136051   0.596723  -0.228 0.820324    
## VariBRS 245 RR:Aval22 -0.620871   0.563421  -1.102 0.274305    
## VariBRS 246 RR:Aval22 -0.094652   0.554212  -0.171 0.864891    
## VariBRS 243 RR:Aval31  0.137025   1.388842   0.099 0.921693    
## VariBRS 245 RR:Aval31  0.334880   1.159213   0.289 0.773535    
## VariBRS 246 RR:Aval31  0.364323   1.240248   0.294 0.769831    
## VariBRS 243 RR:Aval38  0.160147   1.571791   0.102 0.919141    
## VariBRS 245 RR:Aval38  0.555250   1.269361   0.437 0.663170    
## VariBRS 246 RR:Aval38  0.196927   1.468109   0.134 0.893685    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasipoisson family taken to be 19.02829)
## 
##     Null deviance: 7106.3  on 95  degrees of freedom
## Residual deviance: 1283.8  on 69  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 5
anova(m1, test="F")
## Analysis of Deviance Table
## 
## Model: quasipoisson, link: log
## 
## Response: tot
## 
## Terms added sequentially (first to last)
## 
## 
##           Df Deviance Resid. Df Resid. Dev       F    Pr(>F)    
## NULL                         95     7106.3                      
## Bloc       3    948.6        92     6157.7 16.6171  3.15e-08 ***
## Vari       3    354.2        89     5803.5  6.2056 0.0008548 ***
## Aval       5   4432.2        84     1371.3 46.5852 < 2.2e-16 ***
## Vari:Aval 15     87.5        69     1283.8  0.3066 0.9932405    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
par(mfrow=c(2,2)); plot(m1); layout(1)

anova(m1, test="F")
## Analysis of Deviance Table
## 
## Model: quasipoisson, link: log
## 
## Response: tot
## 
## Terms added sequentially (first to last)
## 
## 
##           Df Deviance Resid. Df Resid. Dev       F    Pr(>F)    
## NULL                         95     7106.3                      
## Bloc       3    948.6        92     6157.7 16.6171  3.15e-08 ***
## Vari       3    354.2        89     5803.5  6.2056 0.0008548 ***
## Aval       5   4432.2        84     1371.3 46.5852 < 2.2e-16 ***
## Vari:Aval 15     87.5        69     1283.8  0.3066 0.9932405    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##----------------------------------------------------------------------
## Ajuste do Gamma-Count.

start <- c("alpha"=log(0.05), coef(m0))
parnames(ll) <- names(start)

y <- with(dc, tot)
X <- model.matrix(~Bloc+Vari*Aval, data=dc)

L <- with(dc, list(y=tot, offset=100, X=X))

m3 <- mle2(ll, start=start,
           data=L, vecpar=TRUE)
## summary(m3)

Comparação dos ajustes

##----------------------------------------------------------------------

## Log-verossimilhaça.
cbind(logLik(m0), logLik(m1), logLik(m3))
##           [,1] [,2]      [,3]
## [1,] -879.2287   NA -406.3823
## Estimativas dos parâmetros.
cbind(coef(m0), coef(m1), coef(m3)[-1])
##                              [,1]        [,2]        [,3]
## (Intercept)            0.61622189  0.61622189  0.58504511
## Bloc2                 -0.52019337 -0.52019337 -0.57757362
## Bloc3                 -0.81268000 -0.81268000 -0.91824130
## Bloc4                 -0.99360148 -0.99360148 -1.12689114
## VariBRS 243 RR        -0.46552935 -0.46552935 -0.52339156
## VariBRS 245 RR         0.08382994  0.08382994  0.08711104
## VariBRS 246 RR        -0.25099417 -0.25099417 -0.28037541
## Aval8                  0.28992172  0.28992172  0.31047177
## Aval13                 0.42324335  0.42324335  0.45246233
## Aval22                -0.71124722 -0.71124722 -0.80784849
## Aval31                -2.88144313 -2.88144313 -4.65211494
## Aval38                -3.15587975 -3.15587975 -5.60370939
## VariBRS 243 RR:Aval8  -0.20439954 -0.20439954 -0.21832971
## VariBRS 245 RR:Aval8  -0.24358916 -0.24358916 -0.26078764
## VariBRS 246 RR:Aval8   0.06732943  0.06732943  0.08301183
## VariBRS 243 RR:Aval13 -0.53657204 -0.53657204 -0.58168152
## VariBRS 245 RR:Aval13 -0.48929505 -0.48929505 -0.52302729
## VariBRS 246 RR:Aval13  0.00595329  0.00595329  0.01849365
## VariBRS 243 RR:Aval22 -0.13605064 -0.13605064 -0.24862953
## VariBRS 245 RR:Aval22 -0.62087140 -0.62087140 -0.76234787
## VariBRS 246 RR:Aval22 -0.09465244 -0.09465244 -0.14619717
## VariBRS 243 RR:Aval31  0.13702528  0.13702528 -0.57602847
## VariBRS 245 RR:Aval31  0.33488040  0.33488040  1.01382755
## VariBRS 246 RR:Aval31  0.36432286  0.36432286  0.59593412
## VariBRS 243 RR:Aval38  0.16014748  0.16014748 -0.68516307
## VariBRS 245 RR:Aval38  0.55524980  0.55524980  1.80768792
## VariBRS 246 RR:Aval38  0.19692673  0.19692673  0.09671683
## Estimativa do parâmetro de dispersão.
exp(coef(m3)[1])
##      alpha 
## 0.05122604
## Perfil de log-verossmilhança.
plot(profile(m3, which=1))

a <- c(0, attr(model.matrix(m0), "assign"))
ai <- a==4
L <- t(replicate(sum(ai), rbind(coef(m3)*0), simplify="matrix"))
L[,ai] <- diag(sum(ai))

## Teste de Wald explicito.
## t(L%*%coef(m3))%*%
##     solve(L%*%vcov(m3)%*%t(L))%*%
##     (L%*%coef(m3))
crossprod(L%*%coef(m3), solve(L%*%vcov(m3)%*%t(L), L%*%coef(m3)))
##          [,1]
## [1,] 6.354826
## Teste de Wald para interação (poderia ser LRT, claro).
linearHypothesis(model=m0, ## É necessário um objeto glm.
                 hypothesis.matrix=L,
                 vcov.=vcov(m3),
                 coef.=coef(m3))
## Linear hypothesis test
## 
## Hypothesis:
## VariBRS 243 RR:Aval8 = 0
## VariBRS 245 RR:Aval8 = 0
## VariBRS 246 RR:Aval8 = 0
## VariBRS 243 RR:Aval13 = 0
## VariBRS 245 RR:Aval13 = 0
## VariBRS 246 RR:Aval13 = 0
## VariBRS 243 RR:Aval22 = 0
## VariBRS 245 RR:Aval22 = 0
## VariBRS 246 RR:Aval22 = 0
## VariBRS 243 RR:Aval31 = 0
## VariBRS 245 RR:Aval31 = 0
## VariBRS 246 RR:Aval31 = 0
## VariBRS 243 RR:Aval38 = 0
## VariBRS 245 RR:Aval38 = 0
## VariBRS 246 RR:Aval38 = 0
## 
## Model 1: restricted model
## Model 2: tot ~ offset(log(off)) + Bloc + Vari * Aval
## 
## Note: Coefficient covariance matrix supplied.
## 
##   Res.Df Df  Chisq Pr(>Chisq)
## 1     84                     
## 2     69 15 6.3548     0.9732
## Teste pelo modelo quasi Poisson.
anova(m1, test="Chisq")
## Analysis of Deviance Table
## 
## Model: quasipoisson, link: log
## 
## Response: tot
## 
## Terms added sequentially (first to last)
## 
## 
##           Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                         95     7106.3              
## Bloc       3    948.6        92     6157.7 8.594e-11 ***
## Vari       3    354.2        89     5803.5 0.0003281 ***
## Aval       5   4432.2        84     1371.3 < 2.2e-16 ***
## Vari:Aval 15     87.5        69     1283.8 0.9950153    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m1, test="F")
## Analysis of Deviance Table
## 
## Model: quasipoisson, link: log
## 
## Response: tot
## 
## Terms added sequentially (first to last)
## 
## 
##           Df Deviance Resid. Df Resid. Dev       F    Pr(>F)    
## NULL                         95     7106.3                      
## Bloc       3    948.6        92     6157.7 16.6171  3.15e-08 ***
## Vari       3    354.2        89     5803.5  6.2056 0.0008548 ***
## Aval       5   4432.2        84     1371.3 46.5852 < 2.2e-16 ***
## Vari:Aval 15     87.5        69     1283.8  0.3066 0.9932405    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##----------------------------------------------------------------------
## Ortogonalidade entre dispersão e locação.

V <- cov2cor(vcov(m3))
corrplot.mixed(V, upper="ellipse", col="gray50")

dev.off()
## null device 
##           1

Predição

##----------------------------------------------------------------------
## Predição das médias com IC.

pred <- expand.grid(Vari=levels(dc$Vari),
                    Aval=unique(dc$Aval),
                    Bloc=factor("1", levels=levels(dc$Bloc)),
                    off=100,
                    KEEP.OUT.ATTRS=FALSE)
pred <- list(pois=pred, quasi=pred, cg=pred)

## Quantil normal.
qn <- qnorm(0.975)*c(lwr=-1, fit=0, upr=1)

X <- LSmatrix(lm(tot~Bloc+Vari*Aval, dc), effect=c("Vari", "Aval"))

## Preditos pela Poisson.
## aux <- predict(m0, newdata=pred$pois, se.fit=TRUE)
## aux <- exp(aux$fit+outer(aux$se.fit, qn, FUN="*"))
## pred$pois <- cbind(pred$pois, aux)
aux <- confint(glht(m0, linfct=X), calpha=univariate_calpha())$confint
colnames(aux)[1] <- "fit"
pred$pois <- cbind(pred$pois, 100*exp(aux))

## aux <- predict(m1, newdata=pred$quasi, se.fit=TRUE)
## aux <- exp(aux$fit+outer(aux$se.fit, qn, FUN="*"))
## pred$quasi <- cbind(pred$quasi, aux)
aux <- confint(glht(m1, linfct=X), calpha=univariate_calpha())$confint
colnames(aux)[1] <- "fit"
pred$quasi <- cbind(pred$quasi, 100*exp(aux))

alpha <- coef(m3)[1]  ## Locação.
theta <- coef(m3)[-1] ## Dispersão.

## Preditor linear.
pred$cg$eta <- c(X%*%theta)

## Matrix de covariância completa.
V <- vcov(m3)

## Marginal em theta.
Vm <- V[-1,-1]

## Condicional.
Vc <- V[-1,-1]-V[-1,1]%*%solve(V[1,1])%*%V[1,-1]

max(abs(Vm-Vc))
## [1] 0.3355968
## Preditos pela Gamma-count.
U <- chol(Vm)
aux <- sqrt(apply(X%*%t(U), MARGIN=1,
                  FUN=function(x){ sum(x^2) }))

aux <- pred$cg$eta+outer(aux, qn, FUN="*")
aux <- apply(aux, MARGIN=2,
             FUN=function(col){
                 sapply(col, FUN=eta2mean,
                        offset=100, alpha=alpha)
             })
pred$cg <- cbind(pred$cg, aux)

##----------------------------------------------------------------------

pred <- ldply(pred, .id="modelo")
pred <- arrange(pred, Aval, Vari, modelo)
str(pred)
## 'data.frame':    72 obs. of  9 variables:
##  $ modelo: Factor w/ 3 levels "pois","quasi",..: 1 2 3 1 2 3 1 2 3 1 ...
##  $ Vari  : Factor w/ 4 levels "BRS 239","BRS 243 RR",..: 1 1 1 2 2 2 3 3 3 4 ...
##  $ Aval  : Factor w/ 6 levels "0","8","13","22",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Bloc  : Factor w/ 4 levels "1","2","3","4": 1 1 1 1 1 1 1 1 1 1 ...
##  $ off   : num  100 100 100 100 100 100 100 100 100 100 ...
##  $ fit   : num  104 104 102 65 65 ...
##  $ lwr   : num  94.3 68.9 69.9 57.8 38.9 ...
##  $ upr   : num  113.6 155.5 152.4 73.1 108.5 ...
##  $ eta   : num  NA NA -0.0706 NA NA ...
## source("http://git.leg.ufpr.br/leg/legTools/raw/devel/R/panel.segplot.by.R")

xyplot(tot~Aval|Vari, data=dc, layout=c(NA, 1), as.table=TRUE,
       col=1, cex=0.7, pch=19)+
    as.layer(
        segplot(Aval~lwr+upr|Vari, centers=fit, data=pred,
                horizontal=FALSE,
                draw=FALSE,
                panel=panel.segplot.by, groups=pred$modelo, f=0.15,
                pch=pred$modelo, layout=c(NA, 1), as.table=TRUE, 
                key=list(type="o", divide=1,
                         lines=list(pch=1:nlevels(pred$modelo),
                                    lty=1, col=1),
                         text=list(c("Poisson", "Quasi-Poisson",
                                     "Gamma-Count"))))
    )


Produção de ovos

Análise exploratória

##----------------------------------------------------------------------

dd <- read.table("ovos.txt", header=TRUE, sep="\t")
str(dd)
## 'data.frame':    2100 obs. of  7 variables:
##  $ periodo: int  1 1 1 1 1 1 1 1 1 1 ...
##  $ box    : int  1 1 1 1 1 1 2 2 2 2 ...
##  $ luz    : Factor w/ 5 levels "amarelo","azul",..: 4 4 4 4 4 4 1 1 1 1 ...
##  $ gaiola : int  1 2 3 4 5 6 1 2 3 4 ...
##  $ dia    : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ ovos   : int  9 7 9 7 9 8 9 8 6 10 ...
##  $ massa  : int  698 591 589 401 611 531 569 546 410 631 ...
ftable(xtabs(~periodo+luz+box, data=dd))
##                  box  1  2  3  4  5
## periodo luz                        
## 1       amarelo       0 84  0  0  0
##         azul          0  0 84  0  0
##         branco        0  0  0 84  0
##         verde        84  0  0  0  0
##         vermelho      0  0  0  0 84
## 2       amarelo      84  0  0  0  0
##         azul          0  0  0 84  0
##         branco        0 84  0  0  0
##         verde         0  0  0  0 84
##         vermelho      0  0 84  0  0
## 3       amarelo       0  0  0  0 84
##         azul         84  0  0  0  0
##         branco        0  0 84  0  0
##         verde         0 84  0  0  0
##         vermelho      0  0  0 84  0
## 4       amarelo       0  0 84  0  0
##         azul          0 84  0  0  0
##         branco        0  0  0  0 84
##         verde         0  0  0 84  0
##         vermelho     84  0  0  0  0
## 5       amarelo       0  0  0 84  0
##         azul          0  0  0  0 84
##         branco       84  0  0  0  0
##         verde         0  0 84  0  0
##         vermelho      0 84  0  0  0
## Cada box tem 6 gaiolas. Cada gaiola tem 10 aves.
## Modelar a produção por ave dia usando o total de por gaiola em duas
## semanas. O offset é 10*14=140.

useOuterStrips(
    xyplot(ovos~dia|luz+periodo, data=dd, jitter.x=TRUE,
           groups=gaiola,
           type=c("p", "smooth")))

useOuterStrips(
    xyplot(massa~dia|luz+periodo, data=dd, jitter.x=TRUE,
           groups=gaiola,
           type=c("p", "smooth")))

## Agregar para o total quinzenal.
dd <- aggregate(ovos~periodo+box+luz+gaiola, data=dd, FUN=sum)
str(dd)
## 'data.frame':    150 obs. of  5 variables:
##  $ periodo: int  2 1 4 5 3 3 4 1 2 5 ...
##  $ box    : int  1 2 3 4 5 1 2 3 4 5 ...
##  $ luz    : Factor w/ 5 levels "amarelo","azul",..: 1 1 1 1 1 2 2 2 2 2 ...
##  $ gaiola : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ ovos   : int  111 115 99 118 117 107 91 113 120 115 ...
xyplot(ovos~periodo, groups=luz, data=dd, jitter.x=TRUE,
       ## groups=gaiola,
       type=c("p", "smooth"))

Ajuste dos modelos

##----------------------------------------------------------------------
## Ajuste do Poisson e Quasi-Poisson.

## IMPORTANT: Foi usado um offset de 10*14=140.

dd <- transform(dd,
                off=10*14, ## 10 aves por gaiola, soma de 14 dias.
                Per=factor(periodo),
                Box=factor(box))
names(dd)[3] <- "Luz"

## WARNING: Esta sendo negligenciada a variância de gaiolas. Isso requer
## um modelo de efeitos aleatórios que ainda não dispomos.

## Modelo Poisson para ter referência.
m0 <- glm(ovos~offset(log(off))+Per+Box+Luz,
          data=dd, family=poisson)
summary(m0)
## 
## Call:
## glm(formula = ovos ~ offset(log(off)) + Per + Box + Luz, family = poisson, 
##     data = dd)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -2.98939  -0.70195   0.05559   0.73734   2.84660  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -0.293502   0.028913 -10.151  < 2e-16 ***
## Per2        -0.020631   0.024848  -0.830 0.406375    
## Per3        -0.066985   0.025143  -2.664 0.007717 ** 
## Per4        -0.145362   0.025666  -5.664 1.48e-08 ***
## Per5        -0.094955   0.025330  -3.749 0.000178 ***
## Box2         0.016591   0.025919   0.640 0.522110    
## Box3         0.017281   0.025923   0.667 0.505028    
## Box4         0.071877   0.025581   2.810 0.004957 ** 
## Box5         0.086243   0.025484   3.384 0.000714 ***
## Luzazul     -0.007520   0.025659  -0.293 0.769463    
## Luzbranco    0.005290   0.025575   0.207 0.836122    
## Luzverde     0.026120   0.025441   1.027 0.304566    
## Luzvermelho  0.003538   0.025579   0.138 0.889987    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 214.21  on 149  degrees of freedom
## Residual deviance: 153.17  on 137  degrees of freedom
## AIC: 1148.3
## 
## Number of Fisher Scoring iterations: 4
anova(m0, test="Chisq")
## Analysis of Deviance Table
## 
## Model: poisson, link: log
## 
## Response: ovos
## 
## Terms added sequentially (first to last)
## 
## 
##      Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                   149     214.21              
## Per   4   41.191       145     173.02 2.454e-08 ***
## Box   4   17.905       141     155.12  0.001288 ** 
## Luz   4    1.942       137     153.17  0.746451    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Modelo quasi Poisson para usar os valores iniciais.
## m1 <- glm(count/volume~IP*DAS, data=da, family=quasipoisson)
m1 <- update(m0, family=quasipoisson)
summary(m1)
## 
## Call:
## glm(formula = ovos ~ offset(log(off)) + Per + Box + Luz, family = quasipoisson, 
##     data = dd)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -2.98939  -0.70195   0.05559   0.73734   2.84660  
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.293502   0.030410  -9.652  < 2e-16 ***
## Per2        -0.020631   0.026135  -0.789 0.431234    
## Per3        -0.066985   0.026445  -2.533 0.012435 *  
## Per4        -0.145362   0.026995  -5.385 3.07e-07 ***
## Per5        -0.094955   0.026642  -3.564 0.000503 ***
## Box2         0.016591   0.027261   0.609 0.543808    
## Box3         0.017281   0.027266   0.634 0.527279    
## Box4         0.071877   0.026905   2.671 0.008467 ** 
## Box5         0.086243   0.026804   3.218 0.001614 ** 
## Luzazul     -0.007520   0.026988  -0.279 0.780934    
## Luzbranco    0.005290   0.026899   0.197 0.844375    
## Luzverde     0.026120   0.026758   0.976 0.330712    
## Luzvermelho  0.003538   0.026904   0.132 0.895564    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasipoisson family taken to be 1.106239)
## 
##     Null deviance: 214.21  on 149  degrees of freedom
## Residual deviance: 153.17  on 137  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 4
anova(m1, test="F")
## Analysis of Deviance Table
## 
## Model: quasipoisson, link: log
## 
## Response: ovos
## 
## Terms added sequentially (first to last)
## 
## 
##      Df Deviance Resid. Df Resid. Dev      F    Pr(>F)    
## NULL                   149     214.21                     
## Per   4   41.191       145     173.02 9.3088 1.101e-06 ***
## Box   4   17.905       141     155.12 4.0464  0.003924 ** 
## Luz   4    1.942       137     153.17 0.4388  0.780359    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
par(mfrow=c(2,2)); plot(m1); layout(1)

##----------------------------------------------------------------------
## Ajuste do Gamma-Count.

start <- c("alpha"=log(1), coef(m0))
parnames(ll) <- names(start)

y <- with(dd, ovos)
X <- model.matrix(~Per+Box+Luz, data=dd)

L <- list(y=y, offset=140, X=X)

m3 <- mle2(ll, start=start,
           data=L, vecpar=TRUE)
## summary(m3)

Comparação dos ajustes

##----------------------------------------------------------------------

## Log-verossimilhaça.
cbind(logLik(m0), logLik(m1), logLik(m3))
##           [,1] [,2]      [,3]
## [1,] -561.1435   NA -561.1298
## Estimativas dos parâmetros.
cbind(coef(m0), coef(m1), coef(m3)[-1])
##                     [,1]         [,2]         [,3]
## (Intercept) -0.293502158 -0.293502158 -0.293628429
## Per2        -0.020631035 -0.020631035 -0.020628965
## Per3        -0.066985381 -0.066985381 -0.066953143
## Per4        -0.145362253 -0.145362253 -0.145392814
## Per5        -0.094955166 -0.094955166 -0.094939106
## Box2         0.016590669  0.016590669  0.016613475
## Box3         0.017280556  0.017280556  0.017309822
## Box4         0.071877118  0.071877118  0.071885239
## Box5         0.086243384  0.086243384  0.086288936
## Luzazul     -0.007520114 -0.007520114 -0.007514504
## Luzbranco    0.005290366  0.005290366  0.005306603
## Luzverde     0.026119937  0.026119937  0.026153428
## Luzvermelho  0.003538117  0.003538117  0.003556394
## Estimativa do parâmetro de dispersão.
exp(coef(m3)[1])
##     alpha 
## 0.9809765
## Perfil de log-verossmilhança.
plot(profile(m3, which=1))

a <- c(0, attr(model.matrix(m0), "assign"))
ai <- a==3
L <- t(replicate(sum(ai), rbind(coef(m3)*0), simplify="matrix"))
L[,ai] <- diag(sum(ai))

## Teste de Wald explicito.
## t(L%*%coef(m3))%*%
##     solve(L%*%vcov(m3)%*%t(L))%*%
##     (L%*%coef(m3))
crossprod(L%*%coef(m3), solve(L%*%vcov(m3)%*%t(L), L%*%coef(m3)))
##          [,1]
## [1,] 1.914755
## Teste de Wald para interação (poderia ser LRT, claro).
linearHypothesis(model=m0, ## É necessário um objeto glm.
                 hypothesis.matrix=L,
                 vcov.=vcov(m3),
                 coef.=coef(m3))
## Linear hypothesis test
## 
## Hypothesis:
## Luzazul = 0
## Luzbranco = 0
## Luzverde = 0
## Luzvermelho = 0
## 
## Model 1: restricted model
## Model 2: ovos ~ offset(log(off)) + Per + Box + Luz
## 
## Note: Coefficient covariance matrix supplied.
## 
##   Res.Df Df  Chisq Pr(>Chisq)
## 1    141                     
## 2    137  4 1.9148     0.7514
## Teste pelo modelo quasi Poisson.
anova(m1, test="Chisq")
## Analysis of Deviance Table
## 
## Model: quasipoisson, link: log
## 
## Response: ovos
## 
## Terms added sequentially (first to last)
## 
## 
##      Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                   149     214.21              
## Per   4   41.191       145     173.02 1.611e-07 ***
## Box   4   17.905       141     155.12   0.00278 ** 
## Luz   4    1.942       137     153.17   0.78064    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m1, test="F")
## Analysis of Deviance Table
## 
## Model: quasipoisson, link: log
## 
## Response: ovos
## 
## Terms added sequentially (first to last)
## 
## 
##      Df Deviance Resid. Df Resid. Dev      F    Pr(>F)    
## NULL                   149     214.21                     
## Per   4   41.191       145     173.02 9.3088 1.101e-06 ***
## Box   4   17.905       141     155.12 4.0464  0.003924 ** 
## Luz   4    1.942       137     153.17 0.4388  0.780359    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##----------------------------------------------------------------------
## Ortogonalidade entre dispersão e locação.

V <- cov2cor(vcov(m3))
corrplot.mixed(V, upper="ellipse", col="gray50")

dev.off()
## null device 
##           1

Predição

##----------------------------------------------------------------------
## Predição das médias com IC.

pred <- expand.grid(Per=factor(levels(dd$Per)[1],
                               levels=levels(dd$Per)),
                    Box=factor(levels(dd$Box)[1],
                               levels=levels(dd$Box)),
                    Luz=levels(dd$Luz),
                    off=10*14,
                    KEEP.OUT.ATTRS=FALSE)
pred <- list(pois=pred, quasi=pred, cg=pred)

## Quantil normal.
qn <- qnorm(0.975)*c(lwr=-1, fit=0, upr=1)

X <- LSmatrix(lm(ovos~Per+Box+Luz, dd), effect=c("Luz"))

## Preditos pela Poisson.
## aux <- predict(m0, newdata=pred$pois, se.fit=TRUE)
## aux <- exp(aux$fit+outer(aux$se.fit, qn, FUN="*"))
## pred$pois <- cbind(pred$pois, aux)
aux <- confint(glht(m0, linfct=X), calpha=univariate_calpha())$confint
colnames(aux)[1] <- "fit"
pred$pois <- cbind(pred$pois, 140*exp(aux))

## aux <- predict(m1, newdata=pred$quasi, se.fit=TRUE)
## aux <- exp(aux$fit+outer(aux$se.fit, qn, FUN="*"))
## pred$quasi <- cbind(pred$quasi, aux)
aux <- confint(glht(m1, linfct=X), calpha=univariate_calpha())$confint
colnames(aux)[1] <- "fit"
pred$quasi <- cbind(pred$quasi, 140*exp(aux))

alpha <- coef(m3)[1]  ## Locação.
theta <- coef(m3)[-1] ## Dispersão.

## Preditor linear.
pred$cg$eta <- c(X%*%theta)

## Matrix de covariância completa.
V <- vcov(m3)

## Marginal em theta.
Vm <- V[-1,-1]

## Condicional.
Vc <- V[-1,-1]-V[-1,1]%*%solve(V[1,1])%*%V[1,-1]

max(abs(Vm-Vc))
## [1] 3.270846e-07
## Preditos pela Gamma-count.
U <- chol(Vm)
aux <- sqrt(apply(X%*%t(U), MARGIN=1,
                  FUN=function(x){ sum(x^2) }))

aux <- pred$cg$eta+outer(aux, qn, FUN="*")
aux <- apply(aux, MARGIN=2,
             FUN=function(col){
                 sapply(col, FUN=eta2mean,
                        offset=140, alpha=alpha)
             })
pred$cg <- cbind(pred$cg, aux)

##----------------------------------------------------------------------

pred <- ldply(pred, .id="modelo")
pred <- arrange(pred, Luz, modelo)
str(pred)
## 'data.frame':    15 obs. of  9 variables:
##  $ modelo: Factor w/ 3 levels "pois","quasi",..: 1 2 3 1 2 3 1 2 3 1 ...
##  $ Per   : Factor w/ 5 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Box   : Factor w/ 5 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Luz   : Factor w/ 5 levels "amarelo","azul",..: 1 1 1 2 2 2 3 3 3 4 ...
##  $ off   : num  140 140 140 140 140 140 140 140 140 140 ...
##  $ fit   : num  102 102 102 101 101 ...
##  $ lwr   : num  98 97.9 98 97.3 97.1 ...
##  $ upr   : num  105 105 105 104 105 ...
##  $ eta   : num  NA NA -0.321 NA NA ...
## source("http://git.leg.ufpr.br/leg/legTools/raw/devel/R/panel.segplot.by.R")

segplot(Luz~lwr+upr, centers=fit, data=pred,
        horizontal=FALSE,
        draw=FALSE,
        panel=panel.segplot.by, groups=pred$modelo, f=0.15,
        pch=pred$modelo, layout=c(NA, 1), as.table=TRUE, 
        key=list(type="o", divide=1,
                 lines=list(pch=1:nlevels(pred$modelo),
                            lty=1, col=1),
                 text=list(c("Poisson", "Quasi-Poisson",
                             "Gamma-Count"))))


Capulhos no algodão em função da pressão da mosca branca

http://www.leg.ufpr.br/~walmes/data/mosca_algodao_prod.txt

Análise exploratória

##----------------------------------------------------------------------

da <- read.table(
    "http://www.leg.ufpr.br/~walmes/data/mosca_algodao_prod.txt",
    header=TRUE, sep="\t")
## da <- aggregate(ncapu~vaso+dexp, data=da, FUN=sum)
str(da)
## 'data.frame':    60 obs. of  8 variables:
##  $ dexp    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ vaso    : int  1 1 2 2 3 3 4 4 5 5 ...
##  $ planta  : int  1 2 1 2 1 2 1 2 1 2 ...
##  $ nestrrep: int  4 4 3 5 5 5 5 5 4 5 ...
##  $ ncapu   : int  4 4 2 5 5 5 5 5 4 5 ...
##  $ alt     : num  70.5 67.5 72 70 64 64.5 67.5 61 70 70 ...
##  $ nnos    : int  13 14 14 15 15 11 14 11 14 15 ...
##  $ pesocap : num  25.2 NA 28.5 NA 31.8 ...
xyplot(ncapu~dexp, data=da, jitter.x=TRUE,
       type=c("p", "smooth"))

Ajuste dos modelos

##----------------------------------------------------------------------
## Ajuste do Poisson e Quasi-Poisson.

## da <- transform(da, Dexp=factor(dexp))
da <- transform(da, dexp=dexp-mean(range(dexp)))

## WARNING: Esta sendo negligenciada a variância entre vasos. Isso
## requer um modelo de efeitos aleatórios que ainda não dispomos.

## Modelo Poisson para ter referência.
m0 <- glm(ncapu~dexp+I(dexp^2), data=da, family=poisson)
summary(m0)
## 
## Call:
## glm(formula = ncapu ~ dexp + I(dexp^2), family = poisson, data = da)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.58913  -0.33198   0.06437   0.27783   1.25187  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  1.23887    0.10535  11.759   <2e-16 ***
## dexp        -0.02573    0.03785  -0.680    0.497    
## I(dexp^2)    0.02870    0.02638   1.088    0.277    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 20.800  on 59  degrees of freedom
## Residual deviance: 19.133  on 57  degrees of freedom
## AIC: 214.88
## 
## Number of Fisher Scoring iterations: 4
anova(m0, test="Chisq")
## Analysis of Deviance Table
## 
## Model: poisson, link: log
## 
## Response: ncapu
## 
## Terms added sequentially (first to last)
## 
## 
##           Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL                         59     20.800         
## dexp       1  0.49164        58     20.308   0.4832
## I(dexp^2)  1  1.17575        57     19.133   0.2782
## Modelo quasi Poisson para usar os valores iniciais.
m1 <- update(m0, family=quasipoisson)
summary(m1)
## 
## Call:
## glm(formula = ncapu ~ dexp + I(dexp^2), family = quasipoisson, 
##     data = da)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.58913  -0.33198   0.06437   0.27783   1.25187  
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.23887    0.05991  20.677   <2e-16 ***
## dexp        -0.02573    0.02152  -1.195   0.2370    
## I(dexp^2)    0.02870    0.01500   1.913   0.0607 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasipoisson family taken to be 0.3234131)
## 
##     Null deviance: 20.800  on 59  degrees of freedom
## Residual deviance: 19.133  on 57  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 4
anova(m1, test="F")
## Analysis of Deviance Table
## 
## Model: quasipoisson, link: log
## 
## Response: ncapu
## 
## Terms added sequentially (first to last)
## 
## 
##           Df Deviance Resid. Df Resid. Dev      F  Pr(>F)  
## NULL                         59     20.800                 
## dexp       1  0.49164        58     20.308 1.5202 0.22265  
## I(dexp^2)  1  1.17575        57     19.133 3.6355 0.06161 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
par(mfrow=c(2,2)); plot(m1); layout(1)

##----------------------------------------------------------------------
## Ajuste do Gamma-Count.

start <- c("alpha"=log(2), coef(m0))
parnames(ll) <- names(start)

y <- with(da, ncapu)
X <- model.matrix(m0)

L <- list(y=y, offset=1, X=X)

m3 <- mle2(ll, start=start,
           data=L, vecpar=TRUE)
summary(m3)
## Maximum likelihood estimation
## 
## Call:
## mle2(minuslogl = ll, start = start, data = L, vecpar = TRUE)
## 
## Coefficients:
##              Estimate Std. Error z value     Pr(z)    
## alpha        1.337744   0.203356  6.5783 4.757e-11 ***
## (Intercept)  1.340756   0.053763 24.9381 < 2.2e-16 ***
## dexp        -0.023555   0.019175 -1.2284   0.21929    
## I(dexp^2)    0.026019   0.013354  1.9484   0.05136 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## -2 log L: 178.8027

Comparação dos ajustes

##----------------------------------------------------------------------

## Log-verossimilhaça.
cbind(logLik(m0), logLik(m1), logLik(m3))
##           [,1] [,2]      [,3]
## [1,] -104.4391   NA -89.40136
## Estimativas dos parâmetros.
cbind(coef(m0), coef(m1), coef(m3)[-1])
##                    [,1]        [,2]        [,3]
## (Intercept)  1.23887158  1.23887158  1.34075603
## dexp        -0.02572514 -0.02572514 -0.02355527
## I(dexp^2)    0.02870051  0.02870051  0.02601916
## Estimativa do parâmetro de dispersão.
exp(coef(m3)[1])
##    alpha 
## 3.810439
## Perfil de log-verossmilhança.
plot(profile(m3, which=1))

a <- c(0, attr(model.matrix(m0), "assign"))
ai <- a!=0
L <- t(replicate(sum(ai), rbind(coef(m3)*0), simplify="matrix"))
L[,ai] <- diag(sum(ai))

## Teste de Wald explicito.
## t(L%*%coef(m3))%*%
##     solve(L%*%vcov(m3)%*%t(L))%*%
##     (L%*%coef(m3))
crossprod(L%*%coef(m3), solve(L%*%vcov(m3)%*%t(L), L%*%coef(m3)))
##          [,1]
## [1,] 5.483212
## Teste de Wald para interação (poderia ser LRT, claro).
linearHypothesis(model=m0, ## É necessário um objeto glm.
                 hypothesis.matrix=L,
                 vcov.=vcov(m3),
                 coef.=coef(m3))
## Linear hypothesis test
## 
## Hypothesis:
## dexp = 0
## I(dexp^2) = 0
## 
## Model 1: restricted model
## Model 2: ncapu ~ dexp + I(dexp^2)
## 
## Note: Coefficient covariance matrix supplied.
## 
##   Res.Df Df  Chisq Pr(>Chisq)  
## 1     59                       
## 2     57  2 5.4832    0.06447 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Teste pelo modelo quasi Poisson.
anova(m1, test="Chisq")
## Analysis of Deviance Table
## 
## Model: quasipoisson, link: log
## 
## Response: ncapu
## 
## Terms added sequentially (first to last)
## 
## 
##           Df Deviance Resid. Df Resid. Dev Pr(>Chi)  
## NULL                         59     20.800           
## dexp       1  0.49164        58     20.308  0.21759  
## I(dexp^2)  1  1.17575        57     19.133  0.05656 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m1, test="F")
## Analysis of Deviance Table
## 
## Model: quasipoisson, link: log
## 
## Response: ncapu
## 
## Terms added sequentially (first to last)
## 
## 
##           Df Deviance Resid. Df Resid. Dev      F  Pr(>F)  
## NULL                         59     20.800                 
## dexp       1  0.49164        58     20.308 1.5202 0.22265  
## I(dexp^2)  1  1.17575        57     19.133 3.6355 0.06161 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##----------------------------------------------------------------------
## Ortogonalidade entre dispersão e locação.

V <- cov2cor(vcov(m3))
corrplot.mixed(V, upper="ellipse", col="gray50")

dev.off()
## null device 
##           1

Predição

##----------------------------------------------------------------------
## Predição das médias com IC.

pred <- data.frame(dexp=with(da, seq(min(dexp), max(dexp), l=30)))
pred <- list(pois=pred, quasi=pred, cg=pred)

## Quantil normal.
qn <- qnorm(0.975)*c(lwr=-1, fit=0, upr=1)

X <- model.matrix(formula(m0)[-2], data=pred$cg)

## Preditos pela Poisson.
## aux <- predict(m0, newdata=pred$pois, se.fit=TRUE)
## aux <- exp(aux$fit+outer(aux$se.fit, qn, FUN="*"))
## pred$pois <- cbind(pred$pois, aux)
aux <- confint(glht(m0, linfct=X), calpha=univariate_calpha())$confint
colnames(aux)[1] <- "fit"
pred$pois <- cbind(pred$pois, exp(aux))

## aux <- predict(m1, newdata=pred$quasi, se.fit=TRUE)
## aux <- exp(aux$fit+outer(aux$se.fit, qn, FUN="*"))
## pred$quasi <- cbind(pred$quasi, aux)
aux <- confint(glht(m1, linfct=X), calpha=univariate_calpha())$confint
colnames(aux)[1] <- "fit"
pred$quasi <- cbind(pred$quasi, exp(aux))

alpha <- coef(m3)[1]  ## Locação.
theta <- coef(m3)[-1] ## Dispersão.

## Preditor linear.
pred$cg$eta <- c(X%*%theta)

## Matrix de covariância completa.
V <- vcov(m3)

## Marginal em theta.
Vm <- V[-1,-1]

## Condicional.
Vc <- V[-1,-1]-V[-1,1]%*%solve(V[1,1])%*%V[1,-1]

max(abs(Vm-Vc))
## [1] 5.565329e-05
## Preditos pela Gamma-count.
U <- chol(Vm)
aux <- sqrt(apply(X%*%t(U), MARGIN=1,
                  FUN=function(x){ sum(x^2) }))

aux <- pred$cg$eta+outer(aux, qn, FUN="*")
aux <- apply(aux, MARGIN=2,
             FUN=function(col){
                 sapply(col, FUN=eta2mean,
                        offset=1, alpha=alpha)
             })
pred$cg <- cbind(pred$cg, aux)

##----------------------------------------------------------------------

pred <- ldply(pred, .id="modelo")
pred <- arrange(pred, dexp)
str(pred)
## 'data.frame':    90 obs. of  6 variables:
##  $ modelo: Factor w/ 3 levels "pois","quasi",..: 1 2 3 1 2 3 1 2 3 1 ...
##  $ dexp  : num  -2.5 -2.5 -2.5 -2.33 -2.33 ...
##  $ fit   : num  4.4 4.4 4.4 4.28 4.28 ...
##  $ lwr   : num  3.36 3.77 3.79 3.37 3.73 ...
##  $ upr   : num  5.78 5.14 5.11 5.44 4.91 ...
##  $ eta   : num  NA NA 1.56 NA NA ...
source("https://raw.githubusercontent.com/walmes/wzRfun/master/R/prepanel.cbH.R")
source("https://raw.githubusercontent.com/walmes/wzRfun/master/R/panel.cbH.R")

xyplot(ncapu~dexp, data=da, jitter.x=TRUE, pch=19, col=1)+
    as.layer(
        under=TRUE,
        xyplot(fit~dexp, data=pred, groups=modelo, type="l",
               ly=pred$lwr, uy=pred$upr, cty="bands",
               ## fill="gray90",
               alpha=0.5,
               prepanel=prepanel.cbH,
               panel.groups=panel.cbH,
               panel=panel.superpose)
    )


Mais dados para se considerar