##----------------------------------------------------------------------
## Carrega pacotes.
library(lattice)
library(latticeExtra)
library(plyr)
library(bbmle)
library(corrplot)
library(car)
library(multcomp)
library(doBy)
##----------------------------------------------------------------------
## Função de probabilidade da Gamma-Count.
dgcount <- function(y, alpha, beta, offset=1){
pgamma(offset, alpha*y, alpha*beta)-
pgamma(offset, alpha*y+alpha, alpha*beta)
}
y <- 0:30
pP <- dpois(y, lambda=5)
pGC <- dgcount(y, alpha=1, beta=5)
plot(pP~y, type="h")
points(pGC~c(y+0.2), type="h", col=2)
##----------------------------------------------------------------------
## Explorando a distribuição de probabilidades.
library(rpanel)
draw <- function(input){
with(input,
{
ebeta <- exp(beta)
ealpha <- exp(alpha)
pP <- dpois(y, lambda=ebeta*off)
pGC <- dgcount(y, alpha=ealpha, beta=ebeta, offset=off)
plot(pP~y, type="h", ylim=c(0, max(c(pGC, pP))))
points(pGC~c(y+0.2), type="h", col=2)
abline(v=ebeta, lty=2)
m <- sum(y*pGC)
abline(v=ebeta*off, lty=2)
abline(v=m, col=2, lty=2)
})
return(input)
}
panel <- rp.control(y=0:100)
rp.slider(panel, variable=alpha, from=-5, to=5, initval=0,
showvalue=TRUE, resolution=0.1, action=draw)
rp.slider(panel, variable=beta, from=-5, to=8, initval=3,
showvalue=TRUE, resolution=0.1, action=draw)
rp.doublebutton(panel, variable=off, range=c(0, 3), initval=1,
showvalue=TRUE, step=0.1, action=draw)
Nesse conjunto de dados, a contagem tem um offset que precisa ser considerado.
##----------------------------------------------------------------------
## Carrega os dados.
nematodeSoybean1 <- read.table(
"~/GitLab/legTools/data-raw/nematodeSoybean1.csv",
header=TRUE, sep=";")
nematodeSoybean1$count <- with(nematodeSoybean1, count1+count2)
str(nematodeSoybean1)
## 'data.frame': 60 obs. of 15 variables:
## $ inipop : num 0 0 0 0 0 0.0625 0.0625 0.0625 0.0625 0.0625 ...
## $ rept : int 21 22 23 24 25 21 22 23 24 25 ...
## $ das : int 30 30 30 30 30 30 30 30 30 30 ...
## $ upfm : num 191 168 167 207 223 ...
## $ updm : num 75.3 69.9 73.7 85 88.9 ...
## $ rfm : num 92.9 94.7 145.9 121.4 133.6 ...
## $ count1 : int NA NA NA NA NA 61 101 97 72 84 ...
## $ count2 : int NA NA NA NA NA 74 94 86 81 77 ...
## $ volume : int NA NA NA NA NA 80 100 100 100 100 ...
## $ finpop : int NA NA NA NA NA 10800 19500 18300 15300 16100 ...
## $ rf : num NA NA NA NA NA ...
## $ nemaden: num NA NA NA NA NA ...
## $ gweight: num NA NA NA NA NA NA NA NA NA NA ...
## $ yield : num NA NA NA NA NA NA NA NA NA NA ...
## $ count : int NA NA NA NA NA 135 195 183 153 161 ...
##----------------------------------------------------------------------
xyplot(count1+count2~log2(inipop)|factor(das),
data=nematodeSoybean1,
type=c("p", "smooth"), jitter.x=TRUE,
ylab=expression(Nematodes~by~unit~volume),
xlab=expression(Initial~nematode~density))
xyplot(count~log2(inipop)|factor(das), data=nematodeSoybean1,
type=c("p", "smooth"), jitter.x=TRUE,
ylab=expression(Nematodes~by~unit~volume),
xlab=expression(Initial~nematode~density))
xyplot(count/(2*volume)~log2(inipop)|factor(das), data=nematodeSoybean1,
type=c("p", "smooth"), jitter.x=TRUE,
ylab=expression(Nematodes~by~unit~volume),
xlab=expression(Initial~nematode~density))
##----------------------------------------------------------------------
## Relação média-variância.
da <- subset(nematodeSoybean1, inipop>0)
mv <- ddply(.data=da,
.variables=.(inipop, das),
.fun=summarise,
m=mean(count, na.rm=TRUE),
v=var(count, na.rm=TRUE))
xyplot(v~m, data=mv, scales="free", type=c("p", "r"))+
layer(panel.abline(a=0, b=1, lty=2))
##----------------------------------------------------------------------
## Simulando um conjunto de dados para ver como acomodar o offset na
## Gamma-Count.
y <- rpois(100, lambda=5) ## Contagem.
o <- rep(2, length(y)) ## Offset.
n0 <- glm(y~1, family=poisson)
## predict(n0, type="response"); exp(coef(n0))
## logLik(n0)
n1 <- glm(y~offset(log(o))+1, family=poisson)
## predict(n1, type="response"); exp(coef(n1))*o
## logLik(n1)
ll <- function(theta, y, X, offset){
## theta: parameter vector;
## theta[1]: dispersion parameter;
## theta[-1]: location parameters;
## y: response vector (counts);
## X: model matrix;
## offset: upper limit of the intergral;
eXb <- exp(X%*%theta[-1])
alpha <- exp(theta[1])
alpha.eXb <- alpha*eXb
alpha.y <- alpha*y
## returns the log-likelihood.
-sum(log(
pgamma(offset, alpha.y, alpha.eXb)-
pgamma(offset, alpha.y+alpha, alpha.eXb)
))
}
start <- c(alpha=1, lambda=coef(n1))
parnames(ll) <- names(start)
L <- list(y=y, offset=o, X=cbind(rep(1, length(y))))
n2 <- mle2(ll, start=start, fixed=list(alpha=0),
vecpar=TRUE, data=L)
n3 <- mle2(ll, start=start,
vecpar=TRUE, data=L)
## Log-verossimilhaça.
cbind(logLik(n0), logLik(n1), logLik(n2), logLik(n3))
## [,1] [,2] [,3] [,4]
## [1,] -217.0543 -217.0543 -217.0543 -216.9172
## Estimativas dos parâmetros.
cbind(coef(n0), coef(n1), coef(n2)[-1], coef(n3)[-1])
## [,1] [,2] [,3] [,4]
## (Intercept) 1.60543 0.9122827 0.9122827 0.920382
##----------------------------------------------------------------------
## Ajuste do Poisson e Quasi-Poisson.
da <- transform(da, IP=factor(inipop), DAS=factor(das))
## Modelo Poisson para ter referência.
m0 <- glm(count~offset(log(volume))+IP*DAS, data=da, family=poisson)
summary(m0)
##
## Call:
## glm(formula = count ~ offset(log(volume)) + IP * DAS, family = poisson,
## data = da)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -7.7014 -1.9945 0.0103 1.2140 9.0435
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.54402 0.03477 15.645 < 2e-16 ***
## IP0.125 0.55925 0.04328 12.923 < 2e-16 ***
## IP0.25 -0.22355 0.05053 -4.424 9.69e-06 ***
## IP0.5 0.06575 0.04792 1.372 0.170
## IP1 -0.85824 0.06045 -14.198 < 2e-16 ***
## DAS110 -1.83814 0.08659 -21.227 < 2e-16 ***
## IP0.125:DAS110 -1.01243 0.12984 -7.798 6.30e-15 ***
## IP0.25:DAS110 -0.04809 0.13187 -0.365 0.715
## IP0.5:DAS110 -0.67992 0.14079 -4.829 1.37e-06 ***
## IP1:DAS110 0.11214 0.15088 0.743 0.457
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 5375.55 on 49 degrees of freedom
## Residual deviance: 486.87 on 40 degrees of freedom
## AIC: 800.72
##
## Number of Fisher Scoring iterations: 4
anova(m0, test="Chisq")
## Analysis of Deviance Table
##
## Model: poisson, link: log
##
## Response: count
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 49 5375.6
## IP 4 662.9 45 4712.6 < 2.2e-16 ***
## DAS 1 4124.8 44 587.8 < 2.2e-16 ***
## IP:DAS 4 100.9 40 486.9 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Modelo quasi Poisson para usar os valores iniciais.
## m1 <- glm(count/volume~IP*DAS, data=da, family=quasipoisson)
m1 <- glm(count~offset(log(volume))+IP*DAS,
data=da, family=quasipoisson)
summary(m1)
##
## Call:
## glm(formula = count ~ offset(log(volume)) + IP * DAS, family = quasipoisson,
## data = da)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -7.7014 -1.9945 0.0103 1.2140 9.0435
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.54402 0.12509 4.349 9.16e-05 ***
## IP0.125 0.55925 0.15567 3.592 0.000887 ***
## IP0.25 -0.22355 0.18177 -1.230 0.225947
## IP0.5 0.06575 0.17238 0.381 0.704912
## IP1 -0.85824 0.21745 -3.947 0.000312 ***
## DAS110 -1.83814 0.31150 -5.901 6.51e-07 ***
## IP0.125:DAS110 -1.01243 0.46706 -2.168 0.036192 *
## IP0.25:DAS110 -0.04809 0.47437 -0.101 0.919754
## IP0.5:DAS110 -0.67992 0.50646 -1.343 0.187003
## IP1:DAS110 0.11214 0.54275 0.207 0.837353
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 12.94054)
##
## Null deviance: 5375.55 on 49 degrees of freedom
## Residual deviance: 486.87 on 40 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 4
anova(m1, test="F")
## Analysis of Deviance Table
##
## Model: quasipoisson, link: log
##
## Response: count
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev F Pr(>F)
## NULL 49 5375.6
## IP 4 662.9 45 4712.6 12.8076 8.431e-07 ***
## DAS 1 4124.8 44 587.8 318.7506 < 2.2e-16 ***
## IP:DAS 4 100.9 40 486.9 1.9499 0.1209
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
par(mfrow=c(2,2)); plot(m1); layout(1)
cbind(coef(m0), coef(m1))
## [,1] [,2]
## (Intercept) 0.54401859 0.54401859
## IP0.125 0.55924951 0.55924951
## IP0.25 -0.22354670 -0.22354670
## IP0.5 0.06574698 0.06574698
## IP1 -0.85824022 -0.85824022
## DAS110 -1.83814249 -1.83814249
## IP0.125:DAS110 -1.01243332 -1.01243332
## IP0.25:DAS110 -0.04809225 -0.04809225
## IP0.5:DAS110 -0.67991637 -0.67991637
## IP1:DAS110 0.11214329 0.11214329
##----------------------------------------------------------------------
## Ajuste do Gamma-Count.
start <- c("alpha"=log(1), coef(m0))
parnames(ll) <- names(start)
## y <- with(da, count/vol)
y <- with(da, count/volume)
X <- model.matrix(~IP*DAS, data=da)
L <- with(da,
list(y=count, offset=volume,
X=model.matrix(~IP*DAS)))
m2 <- mle2(ll, start=start, fixed=list(alpha=0),
data=L, vecpar=TRUE)
## Warning in mle2(ll, start = start, fixed = list(alpha = 0), data =
## L, vecpar = TRUE): couldn't invert Hessian
m3 <- mle2(ll, start=start,
data=L, vecpar=TRUE)
##----------------------------------------------------------------------
## Log-verossimilhaça.
cbind(logLik(m0), logLik(m1), logLik(m2), logLik(m3))
## [,1] [,2] [,3] [,4]
## [1,] -390.3592 NA -390.3366 -227.2373
## Estimativas dos parâmetros.
cbind(coef(m0), coef(m1), coef(m2)[-1], coef(m3)[-1])
## [,1] [,2] [,3] [,4]
## (Intercept) 0.54401859 0.54401859 0.54393739 0.51310439
## IP0.125 0.55924951 0.55924951 0.55964189 0.57308855
## IP0.25 -0.22354670 -0.22354670 -0.22366867 -0.22717158
## IP0.5 0.06574698 0.06574698 0.06560404 0.06860458
## IP1 -0.85824022 -0.85824022 -0.85830921 -0.89167062
## DAS110 -1.83814249 -1.83814249 -1.83823781 -1.97733430
## IP0.125:DAS110 -1.01243332 -1.01243332 -1.01246587 -1.09033430
## IP0.25:DAS110 -0.04809225 -0.04809225 -0.04810883 -0.10442588
## IP0.5:DAS110 -0.67991637 -0.67991637 -0.67992897 -0.82075570
## IP1:DAS110 0.11214329 0.11214329 0.11213224 -0.04105420
## Estimativa do parâmetro de dispersão.
exp(coef(m3)[1])
## alpha
## 0.08983008
## Perfil de log-verossmilhança.
plot(profile(m3, which=1))
## Teste para o parâmetro de dispersão.
anova(m3, m2)
## Likelihood Ratio Tests
## Model 1: m3, [ll]: alpha+(Intercept)+IP0.125+IP0.25+IP0.5+IP1+
## DAS110+IP0.125:DAS110+IP0.25:DAS110+IP0.5:DAS110+
## IP1:DAS110
## Model 2: m2, [ll]: (Intercept)+IP0.125+IP0.25+IP0.5+IP1+
## DAS110+IP0.125:DAS110+IP0.25:DAS110+IP0.5:DAS110+
## IP1:DAS110
## Tot Df Deviance Chisq Df Pr(>Chisq)
## 1 11 454.47
## 2 10 780.67 326.2 1 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Teste para interação (Wald).
a <- c(0, attr(model.matrix(m0), "assign"))
ai <- a==3
L <- t(replicate(sum(ai), rbind(coef(m3)*0), simplify="matrix"))
L[,ai] <- diag(sum(ai))
## Teste de Wald explicito.
## t(L%*%coef(m3))%*%
## solve(L%*%vcov(m3)%*%t(L))%*%
## (L%*%coef(m3))
crossprod(L%*%coef(m3), solve(L%*%vcov(m3)%*%t(L), L%*%coef(m3)))
## [,1]
## [1,] 7.85688
## Teste de Wald para interação (poderia ser LRT, claro).
linearHypothesis(model=m0, ## É necessário um objeto glm.
hypothesis.matrix=L,
vcov.=vcov(m3),
coef.=coef(m3))
## Linear hypothesis test
##
## Hypothesis:
## IP0.125:DAS110 = 0
## IP0.25:DAS110 = 0
## IP0.5:DAS110 = 0
## IP1:DAS110 = 0
##
## Model 1: restricted model
## Model 2: count ~ offset(log(volume)) + IP * DAS
##
## Note: Coefficient covariance matrix supplied.
##
## Res.Df Df Chisq Pr(>Chisq)
## 1 44
## 2 40 4 7.8569 0.09696 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Teste pelo modelo quasi Poisson.
anova(m1, test="Chisq")
## Analysis of Deviance Table
##
## Model: quasipoisson, link: log
##
## Response: count
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 49 5375.6
## IP 4 662.9 45 4712.6 1.998e-10 ***
## DAS 1 4124.8 44 587.8 < 2.2e-16 ***
## IP:DAS 4 100.9 40 486.9 0.0992 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m1, test="F")
## Analysis of Deviance Table
##
## Model: quasipoisson, link: log
##
## Response: count
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev F Pr(>F)
## NULL 49 5375.6
## IP 4 662.9 45 4712.6 12.8076 8.431e-07 ***
## DAS 1 4124.8 44 587.8 318.7506 < 2.2e-16 ***
## IP:DAS 4 100.9 40 486.9 1.9499 0.1209
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##----------------------------------------------------------------------
## Ortogonalidade entre dispersão e locação.
V <- cov2cor(vcov(m3))
corrplot.mixed(V, upper="ellipse", col="gray50")
dev.off()
## null device
## 1
V[1,-1]
## (Intercept) IP0.125 IP0.25 IP0.5
## 0.067375821 -0.024481873 0.005491339 -0.004596027
## IP1 DAS110 IP0.125:DAS110 IP0.25:DAS110
## 0.042363354 0.120976452 0.046012475 0.033451032
## IP0.5:DAS110 IP1:DAS110
## 0.076770198 0.078610834
sum(abs(V[1,-1]))
## [1] 0.5001294
##----------------------------------------------------------------------
## Predição das médias com IC.
pred <- expand.grid(IP=levels(da$IP),
DAS=levels(da$DAS),
volume=100,
KEEP.OUT.ATTRS=FALSE)
pred <- list(pois=pred, quasi=pred, cg=pred)
## Quantil normal.
qn <- qnorm(0.975)*c(lwr=-1, fit=0, upr=1)
## Preditos pela Poisson.
aux <- predict(m0, newdata=pred$pois, se.fit=TRUE)
aux <- exp(aux$fit+outer(aux$se.fit, qn, FUN="*"))
pred$pois <- cbind(pred$pois, aux)
str(pred$pois)
## 'data.frame': 10 obs. of 6 variables:
## $ IP : Factor w/ 5 levels "0.0625","0.125",..: 1 2 3 4 5 1 2 3 4 5
## $ DAS : Factor w/ 2 levels "30","110": 1 1 1 1 1 2 2 2 2 2
## $ volume: num 100 100 100 100 100 100 100 100 100 100
## $ lwr : num 160.9 286.6 128.2 172.5 66.3 ...
## $ fit : num 172 301 138 184 73 ...
## $ upr : num 184.4 317 148 196.3 80.5 ...
aux <- predict(m1, newdata=pred$quasi, se.fit=TRUE)
aux <- exp(aux$fit+outer(aux$se.fit, qn, FUN="*"))
pred$quasi <- cbind(pred$quasi, aux)
alpha <- coef(m3)[1] ## Locação.
theta <- coef(m3)[-1] ## Dispersão.
## Preditor linear.
X <- model.matrix(~IP*DAS, data=pred$cg)
pred$cg$eta <- c(X%*%theta)
## Matrix de covariância completa.
V <- vcov(m3)
## Marginal em theta.
Vm <- V[-1,-1]
## Condicional.
Vc <- V[-1,-1]-V[-1,1]%*%solve(V[1,1])%*%V[1,-1]
max(abs(Vm-Vc))
## [1] 0.001924182
## Preditos pela Gamma-count.
U <- chol(Vm)
aux <- sqrt(apply(X%*%t(U), MARGIN=1,
FUN=function(x){ sum(x^2) }))
## Para calcular a média de uma Gamma-Count pelo preditor linear.
eta2mean <- function(eta, offset, alpha, tol=1e-5){
stopifnot(length(eta)==1L)
stopifnot(length(offset)==1L)
stopifnot(length(alpha)==1L)
change <- 1
S <- 0; x <- 1; p <- 1
while (p>tol){
p <- pgamma(offset,
shape=exp(alpha)*x,
rate=exp(alpha)*exp(eta))
S <- S+p
x <- x+1
}
return(S)
}
aux <- pred$cg$eta+outer(aux, qn, FUN="*")
aux <- apply(aux, MARGIN=2,
FUN=function(col){
sapply(col, FUN=eta2mean,
offset=pred$cg$volume[1], alpha=alpha)
})
pred$cg <- cbind(pred$cg, aux)
##----------------------------------------------------------------------
pred <- ldply(pred, .id="modelo")
pred <- arrange(pred, DAS, IP, modelo)
str(pred)
## 'data.frame': 30 obs. of 8 variables:
## $ modelo: Factor w/ 3 levels "pois","quasi",..: 1 2 3 1 2 3 1 2 3 1 ...
## $ IP : Factor w/ 5 levels "0.0625","0.125",..: 1 1 1 2 2 2 3 3 3 4 ...
## $ DAS : Factor w/ 2 levels "30","110": 1 1 1 1 1 1 1 1 1 1 ...
## $ volume: num 100 100 100 100 100 100 100 100 100 100 ...
## $ lwr : num 161 135 138 287 251 ...
## $ fit : num 172 172 172 301 301 ...
## $ upr : num 184 220 215 317 361 ...
## $ eta : num NA NA 0.513 NA NA ...
source("http://git.leg.ufpr.br/leg/legTools/raw/devel/R/panel.segplot.by.R")
segplot(IP~lwr+upr|DAS, centers=fit, data=pred,
draw=FALSE, scales=list(x="free"),
panel=panel.segplot.by, groups=pred$modelo, f=0.1,
pch=pred$modelo, layout=c(1, NA), as.table=TRUE,
key=list(type="o", divide=1,
lines=list(pch=1:nlevels(pred$modelo), lty=1, col=1),
text=list(c("Poisson", "Quasi-Poisson",
"Gamma-Count"))))
Nesse conjunto de dados, o efeito de bloco pode ser aleatório. Teria que implementar uma versão da Gamma-Count para acomodar termos de efeito aleatório.
##----------------------------------------------------------------------
## Carrega os dados.
db <- read.table("http://www.leg.ufpr.br/~walmes/data/soja.txt",
header=TRUE, sep="\t", dec=",")
names(db)[1:2] <- c("k", "a")
db <- db[-74,c(1:3, 8, 10)]
str(db)
## 'data.frame': 74 obs. of 5 variables:
## $ k : int 0 30 60 120 180 0 30 60 120 180 ...
## $ a : num 37.5 37.5 37.5 37.5 37.5 50 50 50 50 50 ...
## $ bloco: Factor w/ 5 levels "I","II","III",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ ts : int 136 159 156 171 190 140 193 200 208 237 ...
## $ nv : int 56 62 66 68 82 63 86 94 86 97 ...
xyplot(nv~k|a, data=db)
xyplot(ts~k|a, data=db)
##----------------------------------------------------------------------
## Gráfico de média-variância não é possível pois não existem repetições
## com o mesmo preditor.
##----------------------------------------------------------------------
## Ajuste do Poisson e Quasi-Poisson.
db <- transform(db, A=factor(a), K=factor(k))
## Modelo Poisson para ter referência.
m0 <- glm(nv~bloco+A*K, data=db, family=poisson)
summary(m0)
##
## Call:
## glm(formula = nv ~ bloco + A * K, family = poisson, data = db)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9681 -0.6393 -0.0195 0.4854 2.3306
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.95369 0.06892 57.363 < 2e-16 ***
## blocoII -0.02928 0.04091 -0.716 0.47414
## blocoIII -0.07265 0.04136 -1.756 0.07902 .
## blocoIV -0.12544 0.04194 -2.991 0.00278 **
## blocoV -0.10795 0.04297 -2.512 0.01199 *
## A50 0.13404 0.08765 1.529 0.12619
## A62.5 0.21656 0.08602 2.518 0.01181 *
## K30 0.27427 0.08493 3.229 0.00124 **
## K60 0.30797 0.08432 3.652 0.00026 ***
## K120 0.32883 0.08395 3.917 8.97e-05 ***
## K180 0.25540 0.08528 2.995 0.00275 **
## A50:K30 0.06322 0.11557 0.547 0.58433
## A62.5:K30 -0.10747 0.11536 -0.932 0.35152
## A50:K60 0.16561 0.11370 1.457 0.14521
## A62.5:K60 0.10735 0.11220 0.957 0.33869
## A50:K120 0.14920 0.11338 1.316 0.18818
## A62.5:K120 0.11839 0.11404 1.038 0.29922
## A50:K180 0.30370 0.11361 2.673 0.00751 **
## A62.5:K180 0.19838 0.11256 1.762 0.07800 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 322.684 on 73 degrees of freedom
## Residual deviance: 65.278 on 55 degrees of freedom
## AIC: 557.23
##
## Number of Fisher Scoring iterations: 4
anova(m0, test="Chisq")
## Analysis of Deviance Table
##
## Model: poisson, link: log
##
## Response: nv
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 73 322.68
## bloco 4 14.284 69 308.40 0.006442 **
## A 2 92.911 67 215.49 < 2.2e-16 ***
## K 4 136.060 63 79.43 < 2.2e-16 ***
## A:K 8 14.150 55 65.28 0.077926 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Modelo quasi Poisson para usar os valores iniciais.
## m1 <- glm(count/volume~IP*DAS, data=da, family=quasipoisson)
m1 <- update(m0, family=quasipoisson)
summary(m1)
##
## Call:
## glm(formula = nv ~ bloco + A * K, family = quasipoisson, data = db)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9681 -0.6393 -0.0195 0.4854 2.3306
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.95369 0.07547 52.389 < 2e-16 ***
## blocoII -0.02928 0.04479 -0.654 0.516036
## blocoIII -0.07265 0.04529 -1.604 0.114420
## blocoIV -0.12544 0.04592 -2.732 0.008457 **
## blocoV -0.10795 0.04705 -2.294 0.025606 *
## A50 0.13404 0.09597 1.397 0.168117
## A62.5 0.21656 0.09418 2.299 0.025303 *
## K30 0.27427 0.09300 2.949 0.004670 **
## K60 0.30797 0.09233 3.336 0.001530 **
## K120 0.32883 0.09192 3.577 0.000734 ***
## K180 0.25540 0.09338 2.735 0.008376 **
## A50:K30 0.06322 0.12654 0.500 0.619324
## A62.5:K30 -0.10747 0.12631 -0.851 0.398532
## A50:K60 0.16561 0.12449 1.330 0.188897
## A62.5:K60 0.10735 0.12286 0.874 0.386028
## A50:K120 0.14920 0.12414 1.202 0.234563
## A62.5:K120 0.11839 0.12487 0.948 0.347229
## A50:K180 0.30370 0.12439 2.441 0.017873 *
## A62.5:K180 0.19838 0.12325 1.610 0.113208
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 1.198902)
##
## Null deviance: 322.684 on 73 degrees of freedom
## Residual deviance: 65.278 on 55 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 4
anova(m1, test="F")
## Analysis of Deviance Table
##
## Model: quasipoisson, link: log
##
## Response: nv
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev F Pr(>F)
## NULL 73 322.68
## bloco 4 14.284 69 308.40 2.9785 0.02684 *
## A 2 92.911 67 215.49 38.7485 3.157e-11 ***
## K 4 136.060 63 79.43 28.3719 8.316e-13 ***
## A:K 8 14.150 55 65.28 1.4754 0.18750
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
par(mfrow=c(2,2)); plot(m1); layout(1)
cbind(coef(m0), coef(m1))
## [,1] [,2]
## (Intercept) 3.95368724 3.95368724
## blocoII -0.02927854 -0.02927854
## blocoIII -0.07265048 -0.07265048
## blocoIV -0.12543798 -0.12543798
## blocoV -0.10794857 -0.10794857
## A50 0.13404356 0.13404356
## A62.5 0.21656458 0.21656458
## K30 0.27427290 0.27427290
## K60 0.30796674 0.30796674
## K120 0.32883188 0.32883188
## K180 0.25540441 0.25540441
## A50:K30 0.06322288 0.06322288
## A62.5:K30 -0.10747272 -0.10747272
## A50:K60 0.16561471 0.16561471
## A62.5:K60 0.10735066 0.10735066
## A50:K120 0.14920392 0.14920392
## A62.5:K120 0.11838578 0.11838578
## A50:K180 0.30369921 0.30369921
## A62.5:K180 0.19837927 0.19837927
##----------------------------------------------------------------------
## Ajuste do Gamma-Count.
start <- c("alpha"=log(1), coef(m0))
parnames(ll) <- names(start)
y <- with(db, nv)
X <- model.matrix(~bloco+A*K, data=db)
L <- with(db,
list(y=nv, offset=1,
X=model.matrix(~bloco+A*K)))
m2 <- mle2(ll, start=start, fixed=list(alpha=0),
data=L, vecpar=TRUE)
m3 <- mle2(ll, start=start,
data=L, vecpar=TRUE)
##----------------------------------------------------------------------
## Log-verossimilhaça.
cbind(logLik(m0), logLik(m1), logLik(m2), logLik(m3))
## [,1] [,2] [,3] [,4]
## [1,] -259.6165 NA -259.6165 -259.3264
## Estimativas dos parâmetros.
cbind(coef(m0), coef(m1), coef(m2)[-1], coef(m3)[-1])
## [,1] [,2] [,3] [,4]
## (Intercept) 3.95368724 3.95368724 3.95368723 3.95487435
## blocoII -0.02927854 -0.02927854 -0.02927855 -0.02925363
## blocoIII -0.07265048 -0.07265048 -0.07265048 -0.07259276
## blocoIV -0.12543798 -0.12543798 -0.12543798 -0.12534177
## blocoV -0.10794857 -0.10794857 -0.10794857 -0.10785674
## A50 0.13404356 0.13404356 0.13404356 0.13388357
## A62.5 0.21656458 0.21656458 0.21656458 0.21632039
## K30 0.27427290 0.27427290 0.27427290 0.27397250
## K60 0.30796674 0.30796674 0.30796674 0.30763762
## K120 0.32883188 0.32883188 0.32883188 0.32848139
## K180 0.25540441 0.25540441 0.25540441 0.25512492
## A50:K30 0.06322288 0.06322288 0.06322288 0.06321988
## A62.5:K30 -0.10747272 -0.10747272 -0.10747272 -0.10732539
## A50:K60 0.16561471 0.16561471 0.16561471 0.16553912
## A62.5:K60 0.10735066 0.10735066 0.10735066 0.10734196
## A50:K120 0.14920392 0.14920392 0.14920392 0.14914452
## A62.5:K120 0.11838578 0.11838578 0.11838578 0.11838048
## A50:K180 0.30369921 0.30369921 0.30369921 0.30351780
## A62.5:K180 0.19837927 0.19837927 0.19837927 0.19829595
## Estimativa do parâmetro de dispersão.
exp(coef(m3)[1])
## alpha
## 1.137415
## Perfil de log-verossmilhança.
plot(profile(m3, which=1))
## Teste para o parâmetro de dispersão.
anova(m3, m2)
## Likelihood Ratio Tests
## Model 1: m3, [ll]: alpha+(Intercept)+blocoII+blocoIII+blocoIV+
## blocoV+A50+A62.5+K30+K60+K120+K180+A50:K30+
## A62.5:K30+A50:K60+A62.5:K60+A50:K120+A62.5:K120+
## A50:K180+A62.5:K180
## Model 2: m2, [ll]: (Intercept)+blocoII+blocoIII+blocoIV+blocoV+
## A50+A62.5+K30+K60+K120+K180+A50:K30+A62.5:K30+
## A50:K60+A62.5:K60+A50:K120+A62.5:K120+A50:K180+
## A62.5:K180
## Tot Df Deviance Chisq Df Pr(>Chisq)
## 1 20 518.65
## 2 19 519.23 0.5802 1 0.4462
## Teste para interação (Wald).
a <- c(0, attr(model.matrix(m0), "assign"))
ai <- a==4
L <- t(replicate(sum(ai), rbind(coef(m3)*0), simplify="matrix"))
L[,ai] <- diag(sum(ai))
## Teste de Wald explicito.
## t(L%*%coef(m3))%*%
## solve(L%*%vcov(m3)%*%t(L))%*%
## (L%*%coef(m3))
crossprod(L%*%coef(m3), solve(L%*%vcov(m3)%*%t(L), L%*%coef(m3)))
## [,1]
## [1,] 15.96558
## Teste de Wald para interação (poderia ser LRT, claro).
linearHypothesis(model=m0, ## É necessário um objeto glm.
hypothesis.matrix=L,
vcov.=vcov(m3),
coef.=coef(m3))
## Linear hypothesis test
##
## Hypothesis:
## A50:K30 = 0
## A62.5:K30 = 0
## A50:K60 = 0
## A62.5:K60 = 0
## A50:K120 = 0
## A62.5:K120 = 0
## A50:K180 = 0
## A62.5:K180 = 0
##
## Model 1: restricted model
## Model 2: nv ~ bloco + A * K
##
## Note: Coefficient covariance matrix supplied.
##
## Res.Df Df Chisq Pr(>Chisq)
## 1 63
## 2 55 8 15.966 0.04288 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Teste pelo modelo quasi Poisson.
anova(m1, test="Chisq")
## Analysis of Deviance Table
##
## Model: quasipoisson, link: log
##
## Response: nv
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 73 322.68
## bloco 4 14.284 69 308.40 0.0180 *
## A 2 92.911 67 215.49 <2e-16 ***
## K 4 136.060 63 79.43 <2e-16 ***
## A:K 8 14.150 55 65.28 0.1602
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m1, test="F")
## Analysis of Deviance Table
##
## Model: quasipoisson, link: log
##
## Response: nv
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev F Pr(>F)
## NULL 73 322.68
## bloco 4 14.284 69 308.40 2.9785 0.02684 *
## A 2 92.911 67 215.49 38.7485 3.157e-11 ***
## K 4 136.060 63 79.43 28.3719 8.316e-13 ***
## A:K 8 14.150 55 65.28 1.4754 0.18750
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##----------------------------------------------------------------------
## Ortogonalidade entre dispersão e locação.
V <- vcov(m3)
corrplot.mixed(V, upper="ellipse", col="gray50")
dev.off()
## null device
## 1
V[1,-1]
## (Intercept) blocoII blocoIII blocoIV
## 2.365324e-04 5.001425e-06 1.151560e-05 1.911075e-05
## blocoV A50 A62.5 K30
## 1.837628e-05 -3.193628e-05 -4.869315e-05 -5.989025e-05
## K60 K120 K180 A50:K30
## -6.558486e-05 -6.987732e-05 -5.568224e-05 -4.608388e-07
## A62.5:K30 A50:K60 A62.5:K60 A50:K120
## 2.939089e-05 -1.498535e-05 -1.705038e-06 -1.175731e-05
## A62.5:K120 A50:K180 A62.5:K180
## -9.755547e-07 -3.610277e-05 -1.658983e-05
sum(abs(V[1,-1]))
## [1] 0.0007341682
##----------------------------------------------------------------------
## Predição das médias com IC.
pred <- expand.grid(A=levels(db$A),
K=levels(db$K),
bloco=factor("I", levels=levels(db$bloco)),
KEEP.OUT.ATTRS=FALSE)
pred <- list(pois=pred, quasi=pred, cg=pred)
## Quantil normal.
qn <- qnorm(0.975)*c(lwr=-1, fit=0, upr=1)
## Preditor linear que considera o efeito médio dos blocos.
X <- LSmatrix(m0, effect=c("A", "K"))
## Preditos pela Poisson.
## aux <- predict(m0, newdata=pred$pois, se.fit=TRUE)
## aux <- exp(aux$fit+outer(aux$se.fit, qn, FUN="*"))
## pred$pois <- cbind(pred$pois, aux)
aux <- confint(glht(m0, linfct=X), calpha=univariate_calpha())$confint
colnames(aux)[1] <- "fit"
pred$pois <- cbind(pred$pois, exp(aux))
## aux <- predict(m1, newdata=pred$quasi, se.fit=TRUE)
## aux <- exp(aux$fit+outer(aux$se.fit, qn, FUN="*"))
## pred$quasi <- cbind(pred$quasi, aux)
aux <- confint(glht(m1, linfct=X), calpha=univariate_calpha())$confint
colnames(aux)[1] <- "fit"
pred$quasi <- cbind(pred$quasi, exp(aux))
alpha <- coef(m3)[1] ## Locação.
theta <- coef(m3)[-1] ## Dispersão.
## Preditor linear.
pred$cg$eta <- c(X%*%theta)
## Matrix de covariância completa.
V <- vcov(m3)
## Marginal em theta.
Vm <- V[-1,-1]
## Condicional.
Vc <- V[-1,-1]-V[-1,1]%*%solve(V[1,1])%*%V[1,-1]
max(abs(Vm-Vc))
## [1] 2.043359e-06
## Preditos pela Gamma-count.
U <- chol(Vm)
aux <- sqrt(apply(X%*%t(U), MARGIN=1,
FUN=function(x){ sum(x^2) }))
aux <- pred$cg$eta+outer(aux, qn, FUN="*")
aux <- apply(aux, MARGIN=2,
FUN=function(col){
sapply(col, FUN=eta2mean,
offset=1, alpha=alpha)
})
pred$cg <- cbind(pred$cg, aux)
##----------------------------------------------------------------------
pred <- ldply(pred, .id="modelo")
pred <- arrange(pred, A, K, modelo)
str(pred)
## 'data.frame': 45 obs. of 8 variables:
## $ modelo: Factor w/ 3 levels "pois","quasi",..: 1 2 3 1 2 3 1 2 3 1 ...
## $ A : Factor w/ 3 levels "37.5","50","62.5": 1 1 1 1 1 1 1 1 1 1 ...
## $ K : Factor w/ 5 levels "0","30","60",..: 1 1 1 2 2 2 3 3 3 4 ...
## $ bloco : Factor w/ 5 levels "I","II","III",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ fit : num 48.7 48.7 48.7 64.1 64.1 ...
## $ lwr : num 43 42.5 43.3 57.5 56.9 ...
## $ upr : num 55.3 55.9 54.8 71.5 72.3 ...
## $ eta : num NA NA 3.89 NA NA ...
xyplot(nv~K|A, data=db, layout=c(NA, 1), as.table=TRUE,
col=1, cex=0.7, pch=19)+
as.layer(
segplot(K~lwr+upr|A, centers=fit, data=pred,
draw=FALSE, horizontal=FALSE, ## scales=list(x="free"),
panel=panel.segplot.by, groups=pred$modelo, f=0.15,
pch=pred$modelo, layout=c(NA, 1), as.table=TRUE,
key=list(type="o", divide=1,
lines=list(pch=1:nlevels(pred$modelo),
lty=1, col=1),
text=list(c("Poisson", "Quasi-Poisson",
"Gamma-Count"))))
)
http://www.leg.ufpr.br/~walmes/data/ninfas.txt
##----------------------------------------------------------------------
## Análise exploratória.
dc <- read.table("moscaBranca.txt", header=TRUE, sep="\t")
dc <- transform(dc,
data=as.Date(dc$data),
bloc=factor(bloc),
tot=inf+med+sup,
off=100)
dc$aval <- with(dc, c(data-min(data)))
dc$Aval <- factor(dc$aval)
names(dc)[2:3] <- c("Vari","Bloc")
dc <- droplevels(subset(dc, grepl("BRS", x=Vari)))
str(dc)
## 'data.frame': 96 obs. of 10 variables:
## $ data: Date, format: "2009-12-11" ...
## $ Vari: Factor w/ 4 levels "BRS 239","BRS 243 RR",..: 3 3 3 3 2 2 2 2 4 4 ...
## $ Bloc: Factor w/ 4 levels "1","2","3","4": 1 2 3 4 1 2 3 4 1 2 ...
## $ sup : int 168 30 28 6 31 4 3 48 100 54 ...
## $ med : int 71 46 36 1 22 6 12 76 50 15 ...
## $ inf : int 39 22 7 31 28 10 27 13 34 7 ...
## $ tot : int 278 98 71 38 81 20 42 137 184 76 ...
## $ off : num 100 100 100 100 100 100 100 100 100 100 ...
## $ aval: num 0 0 0 0 0 0 0 0 0 0 ...
## $ Aval: Factor w/ 6 levels "0","8","13","22",..: 1 1 1 1 1 1 1 1 1 1 ...
## xyplot(sup+med+inf~data, groups=Vari, outer=TRUE, data=dc)
xyplot(tot~data|Vari, data=dc)
##----------------------------------------------------------------------
## Ajuste do Poisson e Quasi-Poisson.
## IMPORTANT: Foi usado um offset de 100 por problemas de
## over/underflow. O problema não é com a Poisson mas com a
## Gamma-Count. Todas as análises consideram um offset de 100, portanto,
## permanecem comparáveis e igualmente interpretáveis.
## Modelo Poisson para ter referência.
m0 <- glm(tot~offset(log(off))+Bloc+Vari*Aval,
data=dc, family=poisson)
summary(m0)
##
## Call:
## glm(formula = tot ~ offset(log(off)) + Bloc + Vari * Aval, family = poisson,
## data = dc)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -12.9723 -2.7083 -0.9217 1.8305 11.3713
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.616222 0.049692 12.401 < 2e-16 ***
## Bloc2 -0.520193 0.032281 -16.114 < 2e-16 ***
## Bloc3 -0.812680 0.035555 -22.857 < 2e-16 ***
## Bloc4 -0.993601 0.037919 -26.204 < 2e-16 ***
## VariBRS 243 RR -0.465529 0.076247 -6.106 1.02e-09 ***
## VariBRS 245 RR 0.083830 0.065605 1.278 0.201320
## VariBRS 246 RR -0.250994 0.071582 -3.506 0.000454 ***
## Aval8 0.289922 0.062610 4.631 3.65e-06 ***
## Aval13 0.423243 0.060915 6.948 3.70e-12 ***
## Aval22 -0.711247 0.082513 -8.620 < 2e-16 ***
## Aval31 -2.881443 0.205529 -14.020 < 2e-16 ***
## Aval38 -3.155880 0.234176 -13.477 < 2e-16 ***
## VariBRS 243 RR:Aval8 -0.204400 0.103779 -1.970 0.048889 *
## VariBRS 245 RR:Aval8 -0.243589 0.089165 -2.732 0.006297 **
## VariBRS 246 RR:Aval8 0.067329 0.093904 0.717 0.473373
## VariBRS 243 RR:Aval13 -0.536572 0.106217 -5.052 4.38e-07 ***
## VariBRS 245 RR:Aval13 -0.489295 0.089303 -5.479 4.28e-08 ***
## VariBRS 246 RR:Aval13 0.005953 0.092025 0.065 0.948419
## VariBRS 243 RR:Aval22 -0.136051 0.136796 -0.995 0.319954
## VariBRS 245 RR:Aval22 -0.620871 0.129162 -4.807 1.53e-06 ***
## VariBRS 246 RR:Aval22 -0.094652 0.127050 -0.745 0.456272
## VariBRS 243 RR:Aval31 0.137025 0.318385 0.430 0.666922
## VariBRS 245 RR:Aval31 0.334880 0.265744 1.260 0.207611
## VariBRS 246 RR:Aval31 0.364323 0.284321 1.281 0.200060
## VariBRS 243 RR:Aval38 0.160147 0.360325 0.444 0.656716
## VariBRS 245 RR:Aval38 0.555250 0.290995 1.908 0.056377 .
## VariBRS 246 RR:Aval38 0.196927 0.336557 0.585 0.558466
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 7106.3 on 95 degrees of freedom
## Residual deviance: 1283.8 on 69 degrees of freedom
## AIC: 1812.5
##
## Number of Fisher Scoring iterations: 5
anova(m0, test="Chisq")
## Analysis of Deviance Table
##
## Model: poisson, link: log
##
## Response: tot
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 95 7106.3
## Bloc 3 948.6 92 6157.7 < 2.2e-16 ***
## Vari 3 354.2 89 5803.5 < 2.2e-16 ***
## Aval 5 4432.2 84 1371.3 < 2.2e-16 ***
## Vari:Aval 15 87.5 69 1283.8 2.897e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Modelo quasi Poisson para usar os valores iniciais.
## m1 <- glm(count/volume~IP*DAS, data=da, family=quasipoisson)
m1 <- update(m0, family=quasipoisson)
summary(m1)
##
## Call:
## glm(formula = tot ~ offset(log(off)) + Bloc + Vari * Aval, family = quasipoisson,
## data = dc)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -12.9723 -2.7083 -0.9217 1.8305 11.3713
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.616222 0.216765 2.843 0.005877 **
## Bloc2 -0.520193 0.140816 -3.694 0.000438 ***
## Bloc3 -0.812680 0.155096 -5.240 1.66e-06 ***
## Bloc4 -0.993601 0.165406 -6.007 7.93e-08 ***
## VariBRS 243 RR -0.465529 0.332600 -1.400 0.166092
## VariBRS 245 RR 0.083830 0.286178 0.293 0.770455
## VariBRS 246 RR -0.250994 0.312251 -0.804 0.424261
## Aval8 0.289922 0.273114 1.062 0.292145
## Aval13 0.423243 0.265718 1.593 0.115770
## Aval22 -0.711247 0.359933 -1.976 0.052148 .
## Aval31 -2.881443 0.896547 -3.214 0.001992 **
## Aval38 -3.155880 1.021509 -3.089 0.002890 **
## VariBRS 243 RR:Aval8 -0.204400 0.452700 -0.452 0.653036
## VariBRS 245 RR:Aval8 -0.243589 0.388950 -0.626 0.533202
## VariBRS 246 RR:Aval8 0.067329 0.409623 0.164 0.869921
## VariBRS 243 RR:Aval13 -0.536572 0.463333 -1.158 0.250828
## VariBRS 245 RR:Aval13 -0.489295 0.389554 -1.256 0.213338
## VariBRS 246 RR:Aval13 0.005953 0.401426 0.015 0.988210
## VariBRS 243 RR:Aval22 -0.136051 0.596723 -0.228 0.820324
## VariBRS 245 RR:Aval22 -0.620871 0.563421 -1.102 0.274305
## VariBRS 246 RR:Aval22 -0.094652 0.554212 -0.171 0.864891
## VariBRS 243 RR:Aval31 0.137025 1.388842 0.099 0.921693
## VariBRS 245 RR:Aval31 0.334880 1.159213 0.289 0.773535
## VariBRS 246 RR:Aval31 0.364323 1.240248 0.294 0.769831
## VariBRS 243 RR:Aval38 0.160147 1.571791 0.102 0.919141
## VariBRS 245 RR:Aval38 0.555250 1.269361 0.437 0.663170
## VariBRS 246 RR:Aval38 0.196927 1.468109 0.134 0.893685
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 19.02829)
##
## Null deviance: 7106.3 on 95 degrees of freedom
## Residual deviance: 1283.8 on 69 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 5
anova(m1, test="F")
## Analysis of Deviance Table
##
## Model: quasipoisson, link: log
##
## Response: tot
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev F Pr(>F)
## NULL 95 7106.3
## Bloc 3 948.6 92 6157.7 16.6171 3.15e-08 ***
## Vari 3 354.2 89 5803.5 6.2056 0.0008548 ***
## Aval 5 4432.2 84 1371.3 46.5852 < 2.2e-16 ***
## Vari:Aval 15 87.5 69 1283.8 0.3066 0.9932405
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
par(mfrow=c(2,2)); plot(m1); layout(1)
anova(m1, test="F")
## Analysis of Deviance Table
##
## Model: quasipoisson, link: log
##
## Response: tot
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev F Pr(>F)
## NULL 95 7106.3
## Bloc 3 948.6 92 6157.7 16.6171 3.15e-08 ***
## Vari 3 354.2 89 5803.5 6.2056 0.0008548 ***
## Aval 5 4432.2 84 1371.3 46.5852 < 2.2e-16 ***
## Vari:Aval 15 87.5 69 1283.8 0.3066 0.9932405
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##----------------------------------------------------------------------
## Ajuste do Gamma-Count.
start <- c("alpha"=log(0.05), coef(m0))
parnames(ll) <- names(start)
y <- with(dc, tot)
X <- model.matrix(~Bloc+Vari*Aval, data=dc)
L <- with(dc, list(y=tot, offset=100, X=X))
m3 <- mle2(ll, start=start,
data=L, vecpar=TRUE)
## summary(m3)
##----------------------------------------------------------------------
## Log-verossimilhaça.
cbind(logLik(m0), logLik(m1), logLik(m3))
## [,1] [,2] [,3]
## [1,] -879.2287 NA -406.3823
## Estimativas dos parâmetros.
cbind(coef(m0), coef(m1), coef(m3)[-1])
## [,1] [,2] [,3]
## (Intercept) 0.61622189 0.61622189 0.58504511
## Bloc2 -0.52019337 -0.52019337 -0.57757362
## Bloc3 -0.81268000 -0.81268000 -0.91824130
## Bloc4 -0.99360148 -0.99360148 -1.12689114
## VariBRS 243 RR -0.46552935 -0.46552935 -0.52339156
## VariBRS 245 RR 0.08382994 0.08382994 0.08711104
## VariBRS 246 RR -0.25099417 -0.25099417 -0.28037541
## Aval8 0.28992172 0.28992172 0.31047177
## Aval13 0.42324335 0.42324335 0.45246233
## Aval22 -0.71124722 -0.71124722 -0.80784849
## Aval31 -2.88144313 -2.88144313 -4.65211494
## Aval38 -3.15587975 -3.15587975 -5.60370939
## VariBRS 243 RR:Aval8 -0.20439954 -0.20439954 -0.21832971
## VariBRS 245 RR:Aval8 -0.24358916 -0.24358916 -0.26078764
## VariBRS 246 RR:Aval8 0.06732943 0.06732943 0.08301183
## VariBRS 243 RR:Aval13 -0.53657204 -0.53657204 -0.58168152
## VariBRS 245 RR:Aval13 -0.48929505 -0.48929505 -0.52302729
## VariBRS 246 RR:Aval13 0.00595329 0.00595329 0.01849365
## VariBRS 243 RR:Aval22 -0.13605064 -0.13605064 -0.24862953
## VariBRS 245 RR:Aval22 -0.62087140 -0.62087140 -0.76234787
## VariBRS 246 RR:Aval22 -0.09465244 -0.09465244 -0.14619717
## VariBRS 243 RR:Aval31 0.13702528 0.13702528 -0.57602847
## VariBRS 245 RR:Aval31 0.33488040 0.33488040 1.01382755
## VariBRS 246 RR:Aval31 0.36432286 0.36432286 0.59593412
## VariBRS 243 RR:Aval38 0.16014748 0.16014748 -0.68516307
## VariBRS 245 RR:Aval38 0.55524980 0.55524980 1.80768792
## VariBRS 246 RR:Aval38 0.19692673 0.19692673 0.09671683
## Estimativa do parâmetro de dispersão.
exp(coef(m3)[1])
## alpha
## 0.05122604
## Perfil de log-verossmilhança.
plot(profile(m3, which=1))
a <- c(0, attr(model.matrix(m0), "assign"))
ai <- a==4
L <- t(replicate(sum(ai), rbind(coef(m3)*0), simplify="matrix"))
L[,ai] <- diag(sum(ai))
## Teste de Wald explicito.
## t(L%*%coef(m3))%*%
## solve(L%*%vcov(m3)%*%t(L))%*%
## (L%*%coef(m3))
crossprod(L%*%coef(m3), solve(L%*%vcov(m3)%*%t(L), L%*%coef(m3)))
## [,1]
## [1,] 6.354826
## Teste de Wald para interação (poderia ser LRT, claro).
linearHypothesis(model=m0, ## É necessário um objeto glm.
hypothesis.matrix=L,
vcov.=vcov(m3),
coef.=coef(m3))
## Linear hypothesis test
##
## Hypothesis:
## VariBRS 243 RR:Aval8 = 0
## VariBRS 245 RR:Aval8 = 0
## VariBRS 246 RR:Aval8 = 0
## VariBRS 243 RR:Aval13 = 0
## VariBRS 245 RR:Aval13 = 0
## VariBRS 246 RR:Aval13 = 0
## VariBRS 243 RR:Aval22 = 0
## VariBRS 245 RR:Aval22 = 0
## VariBRS 246 RR:Aval22 = 0
## VariBRS 243 RR:Aval31 = 0
## VariBRS 245 RR:Aval31 = 0
## VariBRS 246 RR:Aval31 = 0
## VariBRS 243 RR:Aval38 = 0
## VariBRS 245 RR:Aval38 = 0
## VariBRS 246 RR:Aval38 = 0
##
## Model 1: restricted model
## Model 2: tot ~ offset(log(off)) + Bloc + Vari * Aval
##
## Note: Coefficient covariance matrix supplied.
##
## Res.Df Df Chisq Pr(>Chisq)
## 1 84
## 2 69 15 6.3548 0.9732
## Teste pelo modelo quasi Poisson.
anova(m1, test="Chisq")
## Analysis of Deviance Table
##
## Model: quasipoisson, link: log
##
## Response: tot
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 95 7106.3
## Bloc 3 948.6 92 6157.7 8.594e-11 ***
## Vari 3 354.2 89 5803.5 0.0003281 ***
## Aval 5 4432.2 84 1371.3 < 2.2e-16 ***
## Vari:Aval 15 87.5 69 1283.8 0.9950153
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m1, test="F")
## Analysis of Deviance Table
##
## Model: quasipoisson, link: log
##
## Response: tot
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev F Pr(>F)
## NULL 95 7106.3
## Bloc 3 948.6 92 6157.7 16.6171 3.15e-08 ***
## Vari 3 354.2 89 5803.5 6.2056 0.0008548 ***
## Aval 5 4432.2 84 1371.3 46.5852 < 2.2e-16 ***
## Vari:Aval 15 87.5 69 1283.8 0.3066 0.9932405
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##----------------------------------------------------------------------
## Ortogonalidade entre dispersão e locação.
V <- cov2cor(vcov(m3))
corrplot.mixed(V, upper="ellipse", col="gray50")
dev.off()
## null device
## 1
##----------------------------------------------------------------------
## Predição das médias com IC.
pred <- expand.grid(Vari=levels(dc$Vari),
Aval=unique(dc$Aval),
Bloc=factor("1", levels=levels(dc$Bloc)),
off=100,
KEEP.OUT.ATTRS=FALSE)
pred <- list(pois=pred, quasi=pred, cg=pred)
## Quantil normal.
qn <- qnorm(0.975)*c(lwr=-1, fit=0, upr=1)
X <- LSmatrix(lm(tot~Bloc+Vari*Aval, dc), effect=c("Vari", "Aval"))
## Preditos pela Poisson.
## aux <- predict(m0, newdata=pred$pois, se.fit=TRUE)
## aux <- exp(aux$fit+outer(aux$se.fit, qn, FUN="*"))
## pred$pois <- cbind(pred$pois, aux)
aux <- confint(glht(m0, linfct=X), calpha=univariate_calpha())$confint
colnames(aux)[1] <- "fit"
pred$pois <- cbind(pred$pois, 100*exp(aux))
## aux <- predict(m1, newdata=pred$quasi, se.fit=TRUE)
## aux <- exp(aux$fit+outer(aux$se.fit, qn, FUN="*"))
## pred$quasi <- cbind(pred$quasi, aux)
aux <- confint(glht(m1, linfct=X), calpha=univariate_calpha())$confint
colnames(aux)[1] <- "fit"
pred$quasi <- cbind(pred$quasi, 100*exp(aux))
alpha <- coef(m3)[1] ## Locação.
theta <- coef(m3)[-1] ## Dispersão.
## Preditor linear.
pred$cg$eta <- c(X%*%theta)
## Matrix de covariância completa.
V <- vcov(m3)
## Marginal em theta.
Vm <- V[-1,-1]
## Condicional.
Vc <- V[-1,-1]-V[-1,1]%*%solve(V[1,1])%*%V[1,-1]
max(abs(Vm-Vc))
## [1] 0.3355968
## Preditos pela Gamma-count.
U <- chol(Vm)
aux <- sqrt(apply(X%*%t(U), MARGIN=1,
FUN=function(x){ sum(x^2) }))
aux <- pred$cg$eta+outer(aux, qn, FUN="*")
aux <- apply(aux, MARGIN=2,
FUN=function(col){
sapply(col, FUN=eta2mean,
offset=100, alpha=alpha)
})
pred$cg <- cbind(pred$cg, aux)
##----------------------------------------------------------------------
pred <- ldply(pred, .id="modelo")
pred <- arrange(pred, Aval, Vari, modelo)
str(pred)
## 'data.frame': 72 obs. of 9 variables:
## $ modelo: Factor w/ 3 levels "pois","quasi",..: 1 2 3 1 2 3 1 2 3 1 ...
## $ Vari : Factor w/ 4 levels "BRS 239","BRS 243 RR",..: 1 1 1 2 2 2 3 3 3 4 ...
## $ Aval : Factor w/ 6 levels "0","8","13","22",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Bloc : Factor w/ 4 levels "1","2","3","4": 1 1 1 1 1 1 1 1 1 1 ...
## $ off : num 100 100 100 100 100 100 100 100 100 100 ...
## $ fit : num 104 104 102 65 65 ...
## $ lwr : num 94.3 68.9 69.9 57.8 38.9 ...
## $ upr : num 113.6 155.5 152.4 73.1 108.5 ...
## $ eta : num NA NA -0.0706 NA NA ...
## source("http://git.leg.ufpr.br/leg/legTools/raw/devel/R/panel.segplot.by.R")
xyplot(tot~Aval|Vari, data=dc, layout=c(NA, 1), as.table=TRUE,
col=1, cex=0.7, pch=19)+
as.layer(
segplot(Aval~lwr+upr|Vari, centers=fit, data=pred,
horizontal=FALSE,
draw=FALSE,
panel=panel.segplot.by, groups=pred$modelo, f=0.15,
pch=pred$modelo, layout=c(NA, 1), as.table=TRUE,
key=list(type="o", divide=1,
lines=list(pch=1:nlevels(pred$modelo),
lty=1, col=1),
text=list(c("Poisson", "Quasi-Poisson",
"Gamma-Count"))))
)
##----------------------------------------------------------------------
dd <- read.table("ovos.txt", header=TRUE, sep="\t")
str(dd)
## 'data.frame': 2100 obs. of 7 variables:
## $ periodo: int 1 1 1 1 1 1 1 1 1 1 ...
## $ box : int 1 1 1 1 1 1 2 2 2 2 ...
## $ luz : Factor w/ 5 levels "amarelo","azul",..: 4 4 4 4 4 4 1 1 1 1 ...
## $ gaiola : int 1 2 3 4 5 6 1 2 3 4 ...
## $ dia : int 1 1 1 1 1 1 1 1 1 1 ...
## $ ovos : int 9 7 9 7 9 8 9 8 6 10 ...
## $ massa : int 698 591 589 401 611 531 569 546 410 631 ...
ftable(xtabs(~periodo+luz+box, data=dd))
## box 1 2 3 4 5
## periodo luz
## 1 amarelo 0 84 0 0 0
## azul 0 0 84 0 0
## branco 0 0 0 84 0
## verde 84 0 0 0 0
## vermelho 0 0 0 0 84
## 2 amarelo 84 0 0 0 0
## azul 0 0 0 84 0
## branco 0 84 0 0 0
## verde 0 0 0 0 84
## vermelho 0 0 84 0 0
## 3 amarelo 0 0 0 0 84
## azul 84 0 0 0 0
## branco 0 0 84 0 0
## verde 0 84 0 0 0
## vermelho 0 0 0 84 0
## 4 amarelo 0 0 84 0 0
## azul 0 84 0 0 0
## branco 0 0 0 0 84
## verde 0 0 0 84 0
## vermelho 84 0 0 0 0
## 5 amarelo 0 0 0 84 0
## azul 0 0 0 0 84
## branco 84 0 0 0 0
## verde 0 0 84 0 0
## vermelho 0 84 0 0 0
## Cada box tem 6 gaiolas. Cada gaiola tem 10 aves.
## Modelar a produção por ave dia usando o total de por gaiola em duas
## semanas. O offset é 10*14=140.
useOuterStrips(
xyplot(ovos~dia|luz+periodo, data=dd, jitter.x=TRUE,
groups=gaiola,
type=c("p", "smooth")))
useOuterStrips(
xyplot(massa~dia|luz+periodo, data=dd, jitter.x=TRUE,
groups=gaiola,
type=c("p", "smooth")))
## Agregar para o total quinzenal.
dd <- aggregate(ovos~periodo+box+luz+gaiola, data=dd, FUN=sum)
str(dd)
## 'data.frame': 150 obs. of 5 variables:
## $ periodo: int 2 1 4 5 3 3 4 1 2 5 ...
## $ box : int 1 2 3 4 5 1 2 3 4 5 ...
## $ luz : Factor w/ 5 levels "amarelo","azul",..: 1 1 1 1 1 2 2 2 2 2 ...
## $ gaiola : int 1 1 1 1 1 1 1 1 1 1 ...
## $ ovos : int 111 115 99 118 117 107 91 113 120 115 ...
xyplot(ovos~periodo, groups=luz, data=dd, jitter.x=TRUE,
## groups=gaiola,
type=c("p", "smooth"))
##----------------------------------------------------------------------
## Ajuste do Poisson e Quasi-Poisson.
## IMPORTANT: Foi usado um offset de 10*14=140.
dd <- transform(dd,
off=10*14, ## 10 aves por gaiola, soma de 14 dias.
Per=factor(periodo),
Box=factor(box))
names(dd)[3] <- "Luz"
## WARNING: Esta sendo negligenciada a variância de gaiolas. Isso requer
## um modelo de efeitos aleatórios que ainda não dispomos.
## Modelo Poisson para ter referência.
m0 <- glm(ovos~offset(log(off))+Per+Box+Luz,
data=dd, family=poisson)
summary(m0)
##
## Call:
## glm(formula = ovos ~ offset(log(off)) + Per + Box + Luz, family = poisson,
## data = dd)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.98939 -0.70195 0.05559 0.73734 2.84660
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.293502 0.028913 -10.151 < 2e-16 ***
## Per2 -0.020631 0.024848 -0.830 0.406375
## Per3 -0.066985 0.025143 -2.664 0.007717 **
## Per4 -0.145362 0.025666 -5.664 1.48e-08 ***
## Per5 -0.094955 0.025330 -3.749 0.000178 ***
## Box2 0.016591 0.025919 0.640 0.522110
## Box3 0.017281 0.025923 0.667 0.505028
## Box4 0.071877 0.025581 2.810 0.004957 **
## Box5 0.086243 0.025484 3.384 0.000714 ***
## Luzazul -0.007520 0.025659 -0.293 0.769463
## Luzbranco 0.005290 0.025575 0.207 0.836122
## Luzverde 0.026120 0.025441 1.027 0.304566
## Luzvermelho 0.003538 0.025579 0.138 0.889987
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 214.21 on 149 degrees of freedom
## Residual deviance: 153.17 on 137 degrees of freedom
## AIC: 1148.3
##
## Number of Fisher Scoring iterations: 4
anova(m0, test="Chisq")
## Analysis of Deviance Table
##
## Model: poisson, link: log
##
## Response: ovos
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 149 214.21
## Per 4 41.191 145 173.02 2.454e-08 ***
## Box 4 17.905 141 155.12 0.001288 **
## Luz 4 1.942 137 153.17 0.746451
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Modelo quasi Poisson para usar os valores iniciais.
## m1 <- glm(count/volume~IP*DAS, data=da, family=quasipoisson)
m1 <- update(m0, family=quasipoisson)
summary(m1)
##
## Call:
## glm(formula = ovos ~ offset(log(off)) + Per + Box + Luz, family = quasipoisson,
## data = dd)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.98939 -0.70195 0.05559 0.73734 2.84660
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.293502 0.030410 -9.652 < 2e-16 ***
## Per2 -0.020631 0.026135 -0.789 0.431234
## Per3 -0.066985 0.026445 -2.533 0.012435 *
## Per4 -0.145362 0.026995 -5.385 3.07e-07 ***
## Per5 -0.094955 0.026642 -3.564 0.000503 ***
## Box2 0.016591 0.027261 0.609 0.543808
## Box3 0.017281 0.027266 0.634 0.527279
## Box4 0.071877 0.026905 2.671 0.008467 **
## Box5 0.086243 0.026804 3.218 0.001614 **
## Luzazul -0.007520 0.026988 -0.279 0.780934
## Luzbranco 0.005290 0.026899 0.197 0.844375
## Luzverde 0.026120 0.026758 0.976 0.330712
## Luzvermelho 0.003538 0.026904 0.132 0.895564
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 1.106239)
##
## Null deviance: 214.21 on 149 degrees of freedom
## Residual deviance: 153.17 on 137 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 4
anova(m1, test="F")
## Analysis of Deviance Table
##
## Model: quasipoisson, link: log
##
## Response: ovos
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev F Pr(>F)
## NULL 149 214.21
## Per 4 41.191 145 173.02 9.3088 1.101e-06 ***
## Box 4 17.905 141 155.12 4.0464 0.003924 **
## Luz 4 1.942 137 153.17 0.4388 0.780359
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
par(mfrow=c(2,2)); plot(m1); layout(1)
##----------------------------------------------------------------------
## Ajuste do Gamma-Count.
start <- c("alpha"=log(1), coef(m0))
parnames(ll) <- names(start)
y <- with(dd, ovos)
X <- model.matrix(~Per+Box+Luz, data=dd)
L <- list(y=y, offset=140, X=X)
m3 <- mle2(ll, start=start,
data=L, vecpar=TRUE)
## summary(m3)
##----------------------------------------------------------------------
## Log-verossimilhaça.
cbind(logLik(m0), logLik(m1), logLik(m3))
## [,1] [,2] [,3]
## [1,] -561.1435 NA -561.1298
## Estimativas dos parâmetros.
cbind(coef(m0), coef(m1), coef(m3)[-1])
## [,1] [,2] [,3]
## (Intercept) -0.293502158 -0.293502158 -0.293628429
## Per2 -0.020631035 -0.020631035 -0.020628965
## Per3 -0.066985381 -0.066985381 -0.066953143
## Per4 -0.145362253 -0.145362253 -0.145392814
## Per5 -0.094955166 -0.094955166 -0.094939106
## Box2 0.016590669 0.016590669 0.016613475
## Box3 0.017280556 0.017280556 0.017309822
## Box4 0.071877118 0.071877118 0.071885239
## Box5 0.086243384 0.086243384 0.086288936
## Luzazul -0.007520114 -0.007520114 -0.007514504
## Luzbranco 0.005290366 0.005290366 0.005306603
## Luzverde 0.026119937 0.026119937 0.026153428
## Luzvermelho 0.003538117 0.003538117 0.003556394
## Estimativa do parâmetro de dispersão.
exp(coef(m3)[1])
## alpha
## 0.9809765
## Perfil de log-verossmilhança.
plot(profile(m3, which=1))
a <- c(0, attr(model.matrix(m0), "assign"))
ai <- a==3
L <- t(replicate(sum(ai), rbind(coef(m3)*0), simplify="matrix"))
L[,ai] <- diag(sum(ai))
## Teste de Wald explicito.
## t(L%*%coef(m3))%*%
## solve(L%*%vcov(m3)%*%t(L))%*%
## (L%*%coef(m3))
crossprod(L%*%coef(m3), solve(L%*%vcov(m3)%*%t(L), L%*%coef(m3)))
## [,1]
## [1,] 1.914755
## Teste de Wald para interação (poderia ser LRT, claro).
linearHypothesis(model=m0, ## É necessário um objeto glm.
hypothesis.matrix=L,
vcov.=vcov(m3),
coef.=coef(m3))
## Linear hypothesis test
##
## Hypothesis:
## Luzazul = 0
## Luzbranco = 0
## Luzverde = 0
## Luzvermelho = 0
##
## Model 1: restricted model
## Model 2: ovos ~ offset(log(off)) + Per + Box + Luz
##
## Note: Coefficient covariance matrix supplied.
##
## Res.Df Df Chisq Pr(>Chisq)
## 1 141
## 2 137 4 1.9148 0.7514
## Teste pelo modelo quasi Poisson.
anova(m1, test="Chisq")
## Analysis of Deviance Table
##
## Model: quasipoisson, link: log
##
## Response: ovos
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 149 214.21
## Per 4 41.191 145 173.02 1.611e-07 ***
## Box 4 17.905 141 155.12 0.00278 **
## Luz 4 1.942 137 153.17 0.78064
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m1, test="F")
## Analysis of Deviance Table
##
## Model: quasipoisson, link: log
##
## Response: ovos
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev F Pr(>F)
## NULL 149 214.21
## Per 4 41.191 145 173.02 9.3088 1.101e-06 ***
## Box 4 17.905 141 155.12 4.0464 0.003924 **
## Luz 4 1.942 137 153.17 0.4388 0.780359
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##----------------------------------------------------------------------
## Ortogonalidade entre dispersão e locação.
V <- cov2cor(vcov(m3))
corrplot.mixed(V, upper="ellipse", col="gray50")
dev.off()
## null device
## 1
##----------------------------------------------------------------------
## Predição das médias com IC.
pred <- expand.grid(Per=factor(levels(dd$Per)[1],
levels=levels(dd$Per)),
Box=factor(levels(dd$Box)[1],
levels=levels(dd$Box)),
Luz=levels(dd$Luz),
off=10*14,
KEEP.OUT.ATTRS=FALSE)
pred <- list(pois=pred, quasi=pred, cg=pred)
## Quantil normal.
qn <- qnorm(0.975)*c(lwr=-1, fit=0, upr=1)
X <- LSmatrix(lm(ovos~Per+Box+Luz, dd), effect=c("Luz"))
## Preditos pela Poisson.
## aux <- predict(m0, newdata=pred$pois, se.fit=TRUE)
## aux <- exp(aux$fit+outer(aux$se.fit, qn, FUN="*"))
## pred$pois <- cbind(pred$pois, aux)
aux <- confint(glht(m0, linfct=X), calpha=univariate_calpha())$confint
colnames(aux)[1] <- "fit"
pred$pois <- cbind(pred$pois, 140*exp(aux))
## aux <- predict(m1, newdata=pred$quasi, se.fit=TRUE)
## aux <- exp(aux$fit+outer(aux$se.fit, qn, FUN="*"))
## pred$quasi <- cbind(pred$quasi, aux)
aux <- confint(glht(m1, linfct=X), calpha=univariate_calpha())$confint
colnames(aux)[1] <- "fit"
pred$quasi <- cbind(pred$quasi, 140*exp(aux))
alpha <- coef(m3)[1] ## Locação.
theta <- coef(m3)[-1] ## Dispersão.
## Preditor linear.
pred$cg$eta <- c(X%*%theta)
## Matrix de covariância completa.
V <- vcov(m3)
## Marginal em theta.
Vm <- V[-1,-1]
## Condicional.
Vc <- V[-1,-1]-V[-1,1]%*%solve(V[1,1])%*%V[1,-1]
max(abs(Vm-Vc))
## [1] 3.270846e-07
## Preditos pela Gamma-count.
U <- chol(Vm)
aux <- sqrt(apply(X%*%t(U), MARGIN=1,
FUN=function(x){ sum(x^2) }))
aux <- pred$cg$eta+outer(aux, qn, FUN="*")
aux <- apply(aux, MARGIN=2,
FUN=function(col){
sapply(col, FUN=eta2mean,
offset=140, alpha=alpha)
})
pred$cg <- cbind(pred$cg, aux)
##----------------------------------------------------------------------
pred <- ldply(pred, .id="modelo")
pred <- arrange(pred, Luz, modelo)
str(pred)
## 'data.frame': 15 obs. of 9 variables:
## $ modelo: Factor w/ 3 levels "pois","quasi",..: 1 2 3 1 2 3 1 2 3 1 ...
## $ Per : Factor w/ 5 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Box : Factor w/ 5 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Luz : Factor w/ 5 levels "amarelo","azul",..: 1 1 1 2 2 2 3 3 3 4 ...
## $ off : num 140 140 140 140 140 140 140 140 140 140 ...
## $ fit : num 102 102 102 101 101 ...
## $ lwr : num 98 97.9 98 97.3 97.1 ...
## $ upr : num 105 105 105 104 105 ...
## $ eta : num NA NA -0.321 NA NA ...
## source("http://git.leg.ufpr.br/leg/legTools/raw/devel/R/panel.segplot.by.R")
segplot(Luz~lwr+upr, centers=fit, data=pred,
horizontal=FALSE,
draw=FALSE,
panel=panel.segplot.by, groups=pred$modelo, f=0.15,
pch=pred$modelo, layout=c(NA, 1), as.table=TRUE,
key=list(type="o", divide=1,
lines=list(pch=1:nlevels(pred$modelo),
lty=1, col=1),
text=list(c("Poisson", "Quasi-Poisson",
"Gamma-Count"))))
http://www.leg.ufpr.br/~walmes/data/mosca_algodao_prod.txt
##----------------------------------------------------------------------
da <- read.table(
"http://www.leg.ufpr.br/~walmes/data/mosca_algodao_prod.txt",
header=TRUE, sep="\t")
## da <- aggregate(ncapu~vaso+dexp, data=da, FUN=sum)
str(da)
## 'data.frame': 60 obs. of 8 variables:
## $ dexp : int 0 0 0 0 0 0 0 0 0 0 ...
## $ vaso : int 1 1 2 2 3 3 4 4 5 5 ...
## $ planta : int 1 2 1 2 1 2 1 2 1 2 ...
## $ nestrrep: int 4 4 3 5 5 5 5 5 4 5 ...
## $ ncapu : int 4 4 2 5 5 5 5 5 4 5 ...
## $ alt : num 70.5 67.5 72 70 64 64.5 67.5 61 70 70 ...
## $ nnos : int 13 14 14 15 15 11 14 11 14 15 ...
## $ pesocap : num 25.2 NA 28.5 NA 31.8 ...
xyplot(ncapu~dexp, data=da, jitter.x=TRUE,
type=c("p", "smooth"))
##----------------------------------------------------------------------
## Ajuste do Poisson e Quasi-Poisson.
## da <- transform(da, Dexp=factor(dexp))
da <- transform(da, dexp=dexp-mean(range(dexp)))
## WARNING: Esta sendo negligenciada a variância entre vasos. Isso
## requer um modelo de efeitos aleatórios que ainda não dispomos.
## Modelo Poisson para ter referência.
m0 <- glm(ncapu~dexp+I(dexp^2), data=da, family=poisson)
summary(m0)
##
## Call:
## glm(formula = ncapu ~ dexp + I(dexp^2), family = poisson, data = da)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.58913 -0.33198 0.06437 0.27783 1.25187
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.23887 0.10535 11.759 <2e-16 ***
## dexp -0.02573 0.03785 -0.680 0.497
## I(dexp^2) 0.02870 0.02638 1.088 0.277
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 20.800 on 59 degrees of freedom
## Residual deviance: 19.133 on 57 degrees of freedom
## AIC: 214.88
##
## Number of Fisher Scoring iterations: 4
anova(m0, test="Chisq")
## Analysis of Deviance Table
##
## Model: poisson, link: log
##
## Response: ncapu
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 59 20.800
## dexp 1 0.49164 58 20.308 0.4832
## I(dexp^2) 1 1.17575 57 19.133 0.2782
## Modelo quasi Poisson para usar os valores iniciais.
m1 <- update(m0, family=quasipoisson)
summary(m1)
##
## Call:
## glm(formula = ncapu ~ dexp + I(dexp^2), family = quasipoisson,
## data = da)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.58913 -0.33198 0.06437 0.27783 1.25187
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.23887 0.05991 20.677 <2e-16 ***
## dexp -0.02573 0.02152 -1.195 0.2370
## I(dexp^2) 0.02870 0.01500 1.913 0.0607 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 0.3234131)
##
## Null deviance: 20.800 on 59 degrees of freedom
## Residual deviance: 19.133 on 57 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 4
anova(m1, test="F")
## Analysis of Deviance Table
##
## Model: quasipoisson, link: log
##
## Response: ncapu
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev F Pr(>F)
## NULL 59 20.800
## dexp 1 0.49164 58 20.308 1.5202 0.22265
## I(dexp^2) 1 1.17575 57 19.133 3.6355 0.06161 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
par(mfrow=c(2,2)); plot(m1); layout(1)
##----------------------------------------------------------------------
## Ajuste do Gamma-Count.
start <- c("alpha"=log(2), coef(m0))
parnames(ll) <- names(start)
y <- with(da, ncapu)
X <- model.matrix(m0)
L <- list(y=y, offset=1, X=X)
m3 <- mle2(ll, start=start,
data=L, vecpar=TRUE)
summary(m3)
## Maximum likelihood estimation
##
## Call:
## mle2(minuslogl = ll, start = start, data = L, vecpar = TRUE)
##
## Coefficients:
## Estimate Std. Error z value Pr(z)
## alpha 1.337744 0.203356 6.5783 4.757e-11 ***
## (Intercept) 1.340756 0.053763 24.9381 < 2.2e-16 ***
## dexp -0.023555 0.019175 -1.2284 0.21929
## I(dexp^2) 0.026019 0.013354 1.9484 0.05136 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## -2 log L: 178.8027
##----------------------------------------------------------------------
## Log-verossimilhaça.
cbind(logLik(m0), logLik(m1), logLik(m3))
## [,1] [,2] [,3]
## [1,] -104.4391 NA -89.40136
## Estimativas dos parâmetros.
cbind(coef(m0), coef(m1), coef(m3)[-1])
## [,1] [,2] [,3]
## (Intercept) 1.23887158 1.23887158 1.34075603
## dexp -0.02572514 -0.02572514 -0.02355527
## I(dexp^2) 0.02870051 0.02870051 0.02601916
## Estimativa do parâmetro de dispersão.
exp(coef(m3)[1])
## alpha
## 3.810439
## Perfil de log-verossmilhança.
plot(profile(m3, which=1))
a <- c(0, attr(model.matrix(m0), "assign"))
ai <- a!=0
L <- t(replicate(sum(ai), rbind(coef(m3)*0), simplify="matrix"))
L[,ai] <- diag(sum(ai))
## Teste de Wald explicito.
## t(L%*%coef(m3))%*%
## solve(L%*%vcov(m3)%*%t(L))%*%
## (L%*%coef(m3))
crossprod(L%*%coef(m3), solve(L%*%vcov(m3)%*%t(L), L%*%coef(m3)))
## [,1]
## [1,] 5.483212
## Teste de Wald para interação (poderia ser LRT, claro).
linearHypothesis(model=m0, ## É necessário um objeto glm.
hypothesis.matrix=L,
vcov.=vcov(m3),
coef.=coef(m3))
## Linear hypothesis test
##
## Hypothesis:
## dexp = 0
## I(dexp^2) = 0
##
## Model 1: restricted model
## Model 2: ncapu ~ dexp + I(dexp^2)
##
## Note: Coefficient covariance matrix supplied.
##
## Res.Df Df Chisq Pr(>Chisq)
## 1 59
## 2 57 2 5.4832 0.06447 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Teste pelo modelo quasi Poisson.
anova(m1, test="Chisq")
## Analysis of Deviance Table
##
## Model: quasipoisson, link: log
##
## Response: ncapu
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 59 20.800
## dexp 1 0.49164 58 20.308 0.21759
## I(dexp^2) 1 1.17575 57 19.133 0.05656 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m1, test="F")
## Analysis of Deviance Table
##
## Model: quasipoisson, link: log
##
## Response: ncapu
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev F Pr(>F)
## NULL 59 20.800
## dexp 1 0.49164 58 20.308 1.5202 0.22265
## I(dexp^2) 1 1.17575 57 19.133 3.6355 0.06161 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##----------------------------------------------------------------------
## Ortogonalidade entre dispersão e locação.
V <- cov2cor(vcov(m3))
corrplot.mixed(V, upper="ellipse", col="gray50")
dev.off()
## null device
## 1
##----------------------------------------------------------------------
## Predição das médias com IC.
pred <- data.frame(dexp=with(da, seq(min(dexp), max(dexp), l=30)))
pred <- list(pois=pred, quasi=pred, cg=pred)
## Quantil normal.
qn <- qnorm(0.975)*c(lwr=-1, fit=0, upr=1)
X <- model.matrix(formula(m0)[-2], data=pred$cg)
## Preditos pela Poisson.
## aux <- predict(m0, newdata=pred$pois, se.fit=TRUE)
## aux <- exp(aux$fit+outer(aux$se.fit, qn, FUN="*"))
## pred$pois <- cbind(pred$pois, aux)
aux <- confint(glht(m0, linfct=X), calpha=univariate_calpha())$confint
colnames(aux)[1] <- "fit"
pred$pois <- cbind(pred$pois, exp(aux))
## aux <- predict(m1, newdata=pred$quasi, se.fit=TRUE)
## aux <- exp(aux$fit+outer(aux$se.fit, qn, FUN="*"))
## pred$quasi <- cbind(pred$quasi, aux)
aux <- confint(glht(m1, linfct=X), calpha=univariate_calpha())$confint
colnames(aux)[1] <- "fit"
pred$quasi <- cbind(pred$quasi, exp(aux))
alpha <- coef(m3)[1] ## Locação.
theta <- coef(m3)[-1] ## Dispersão.
## Preditor linear.
pred$cg$eta <- c(X%*%theta)
## Matrix de covariância completa.
V <- vcov(m3)
## Marginal em theta.
Vm <- V[-1,-1]
## Condicional.
Vc <- V[-1,-1]-V[-1,1]%*%solve(V[1,1])%*%V[1,-1]
max(abs(Vm-Vc))
## [1] 5.565329e-05
## Preditos pela Gamma-count.
U <- chol(Vm)
aux <- sqrt(apply(X%*%t(U), MARGIN=1,
FUN=function(x){ sum(x^2) }))
aux <- pred$cg$eta+outer(aux, qn, FUN="*")
aux <- apply(aux, MARGIN=2,
FUN=function(col){
sapply(col, FUN=eta2mean,
offset=1, alpha=alpha)
})
pred$cg <- cbind(pred$cg, aux)
##----------------------------------------------------------------------
pred <- ldply(pred, .id="modelo")
pred <- arrange(pred, dexp)
str(pred)
## 'data.frame': 90 obs. of 6 variables:
## $ modelo: Factor w/ 3 levels "pois","quasi",..: 1 2 3 1 2 3 1 2 3 1 ...
## $ dexp : num -2.5 -2.5 -2.5 -2.33 -2.33 ...
## $ fit : num 4.4 4.4 4.4 4.28 4.28 ...
## $ lwr : num 3.36 3.77 3.79 3.37 3.73 ...
## $ upr : num 5.78 5.14 5.11 5.44 4.91 ...
## $ eta : num NA NA 1.56 NA NA ...
source("https://raw.githubusercontent.com/walmes/wzRfun/master/R/prepanel.cbH.R")
source("https://raw.githubusercontent.com/walmes/wzRfun/master/R/panel.cbH.R")
xyplot(ncapu~dexp, data=da, jitter.x=TRUE, pch=19, col=1)+
as.layer(
under=TRUE,
xyplot(fit~dexp, data=pred, groups=modelo, type="l",
ly=pred$lwr, uy=pred$upr, cty="bands",
## fill="gray90",
alpha=0.5,
prepanel=prepanel.cbH,
panel.groups=panel.cbH,
panel=panel.superpose)
)