diff --git a/docs/01-tcc.pdf b/docs/01-tcc.pdf index fce80c3eff460ca42ceee91e5a68560e09c4fe20..bf5ce5f1db9bdd05c93e2bc5ff484c6f66b5ac1d 100644 Binary files a/docs/01-tcc.pdf and b/docs/01-tcc.pdf differ diff --git a/docs/cap03_materiais-e-metodos.Rnw b/docs/cap03_materiais-e-metodos.Rnw index 31dbceca95c7555a7c2d79f9b15ca0724807e159..f0e242bd89afe52eb35cce70d7fd1e55ec205881 100644 --- a/docs/cap03_materiais-e-metodos.Rnw +++ b/docs/cap03_materiais-e-metodos.Rnw @@ -2,9 +2,494 @@ % CAPÍTULO 3 - MATERIAIS E MÉTODOS % ------------------------------------------------------------------------ +Essa seção é destinada a apresentação dos conjuntos de dados analisados +no trabalho, descrição dos recursos computacionais e métodos utilizados +na análise. Na seção \ref{cap03:materiais-dados} os conjuntos de dados +seão apresentados, ao todo são seis conjuntos de dados com diferentes +características. Os recursos computacionais utilizados são descritos na +seção \ref{cap03:materiais-recursos}. E na última seção +\ref{cap03:metodos} deste capítulo são apresentados os métodos para +ajuste e comparação dos modelos alternativos propostos. + \section{Materias} \label{cap03:materiais} -\lipsum[1-3] + +\subsection{Conjuntos de dados} +\label{cap03:materiais-dados} + +A seguir são apresentados os seis conjuntos de dados analisados com os +modelos alternativos propostos. Os dados em estudo são, quase em sua +totalidade, resultantes de experimentos agronômicos com delineamentos +balanceados o que é uma característica desejável, haja visto que o +principal interesse é a avaliação do desempenho do modelo COM-Poisson +quando empregado a análise desses dados. + +A apresentação dos conjuntos segue a ordem de 1) descrição do +experimento ou estudo em destaque, 2) definição das variáveis e suas +unidades de medidas e 3) descrição de suas características potencialmente +contempladas por modelos alternativos ao Poisson. + +%% {\normalsize \textsc{Capulhos de Algodão em Função de Desfolha}} +\datatitle{Capulhos de Algodão em Sob Efeito de Desfolha Artificial} + +<<data-cottonBolls, include=FALSE>>= + +data(cottonBolls, package = "tccPackage") + +niveis.des <- paste0(paste(unique(cottonBolls$des)*100, + collapse = ", "), "\\%") +niveis.est <- paste(unique(cottonBolls$est), collapse = ", ") + +@ + +Experimento conduzido sob delineamento interamente casualizado com cinco +repetições em casa de vegetação com plantas de algodão \emph{Gossypium + hirsutum} submetidas à diferentes níveis de desfolha artificial de +remoção foliar (\Sexpr{niveis.des}), em combinação com o estágio +fenológico no qual a desfolha foi aplicada (\Sexpr{niveis.est}). A +unidade experimental foi um vaso com duas plantas onde avaliou-se o +número de capulhos produzidos ao final da ciclo cultura +\apud{Zeviani2014}{Silva2012}. O experimento contou com +\Sexpr{nrow(cottonBolls)} observações das quais temos as informações das +variáveis. + +Esse conjunto de dados já fora publicado sob a motivação da +característica de subdispersão, na ocasião o modelo proposto na análise +foi o \textit{Gamma-Count}. Na \ref{descr-cottonBolls}, apresenta-se os +dados do experimento, à esquerda temos a disposição das cinco +observações em cada tratamento (combinação de nível de desfolha e +estágio fenológico do algodão) e à direita um outro gráfico descritivo +cruzando médias e variâncias amostrais calculadas em cada tratamento, a +linha pontilhada neste caso representa a característica de +equidispersão. Note que em todos os tratamentos obteve-se a média +menor que a variância apontando evidência de subdispersão. + +<<descr-cottonBolls, fig.height=4.5, fig.width=8, fig.cap="(Esquerda) Número de capulhos produzidos para cada nível de desfolha e estágio fenológico. (Direita) Variância versus média amostral de cada uma das cinco repetições em cada combinação de nível de desfolha e estágio fenológico">>= + +library(tccPackage) +xy1 <- xyplot(ncap ~ des | est, + data = cottonBolls, + layout = c(NA, 3), + as.table = TRUE, + type = c("p", "g", "smooth"), + xlab = "Níveis de desfolha artificial", + ylab = "Número de capulhos produzidos", + xlim = extendrange(c(0:1), f = 0.15), + spread = 0.08, + panel = panel.beeswarm, + par.settings = ps.sub,) + +## Média e variância amostral para cada unidade experimental +mv <- aggregate(ncap ~ est + des, data = cottonBolls, + FUN = function(x) c(mean = mean(x), var = var(x))) +xlim <- ylim <- extendrange(c(mv$ncap), f = 0.05) + +## Evidência de subdispersão +xy2 <- xyplot(ncap[, "var"] ~ ncap[, "mean"], + data = mv, + type = c("p", "r", "g"), + xlim = xlim, + ylim = ylim, + ylab = expression("Variância Amostral"~(s^2)), + xlab = expression("Média amostral"~(bar(y))), + par.settings = ps.sub, + panel = function(x, y, ...) { + panel.xyplot(x, y, ...) + panel.abline(a = 0, b = 1, lty = 2) + }) + +print(xy1, split = c(1, 1, 2, 1), more = TRUE) +print(xy2, split = c(2, 1, 2, 1), more = FALSE) + +fonte.xy("Fonte: Traduzido de Zeviani et al. (Figura 2)") + +@ + +\datatitle{Avaliação da Exposição à Mosca-branca na produção da cultura + de algodão} + +<<data-cottonBolls2, include=FALSE>>= + +data(cottonBolls2, package = "tccPackage") + +## niveis.des <- paste0(paste(unique(cottonBolls$des)*100, +## collapse = ", "), "\\%") + +@ + +Experimento conduzido na Universidade Federal da Grande Dourados (UFGD) +em 2007, cujo objetivo foi avaliar os impactos da exposição de plantas à +alta infestação de Mosca-Branca \emph{Bemisia tabaci} em componentes de +produção do algodão \footnote{Experimento ainda não publicado.}. No +experimento, plantas de algodão foram expostas à alta infestação da +praga por períodos diferentes e ao final avaliou-se o número de capulhos +produzidos, o número de estruturas reprodutivas, o número de nós, a +altura da planta e o peso dos capulhos por vaso. A condução do estudo +deu-se via delineamento interamente casualizado com cinco vasos, +contendo duas plantas, para cada período de exposição. Na figura +\ref{fig:descr-cottonBolls2} apresentamos a disposição de cada uma das +variáveis aleatórias de contagem número de estruturas reprodutivas, +número de capulhos produzidos e número de nós da planta para os +diferentes períodos em que as plantas estavam sob alta infestação de +Mosca-branca. Notamos que para todas as contagens parece que há um +comportamento subdisperso. + +A indicação de subdispersão também se observa na tabela +\ref{tab:mv-cottonBolls2}, cujo temos as médias e variâncias amostrais +calculadas com as observações nos seis períodos de exposição à +infestação de Mosca-branca. Note que nesse experimento também há +indícios de subdispersão para todas as contagens realizados no +experimento, ainda a manisfestação de subdispersão na contagem do número +de nós da planta é mais expressiva. + +<<descr-cottonBolls2, fig.height=4, fig.width=7.2, fig.cap="Disposição das variáveis de contagem nº de estruturas reprodutivas, nº de capulhos produzidos e nº de nós da planta observadas sob diferentes dias de exposição à infestação de Mosca-branca">>= + +vars <- c("dexp", "vaso", "planta", "nerep", "ncapu", "nnos") +cottonBolls2 <- cottonBolls2[, vars] + +da <- reshape2::melt(cottonBolls2, id = c("dexp", "vaso", "planta"), + variable.name = "va", value.name = "count") +## da <- aggregate(count ~ vaso + dexp + va, data = da, FUN = sum) + +xyplot(count ~ dexp | va, groups = planta, data = da, + type = c("p", "g", "smooth"), + layout = c(NA, 1), + ylab = "Contagens", + xlab = "Dias de exposição a alta infestação de Mosca-branca", + scales = list( + y = list(relation = "free", rot = 0)), + auto.key = list( + column = 2, title = "Planta", cex.title = 1, + lines = TRUE), + strip = strip.custom( + factor.levels = c("Estruturas reprodutivas ", + "Capulhos produzidos", + "Nós da planta") + ), + spread = 0.15, + panel = panel.beeswarm, + par.settings = ps.sub) + +fonte.xy("Fonte: Elaborado pelo autor.") + +@ + +<<mv-cottonBolls2, include=FALSE>>= + +##------------------------------------------- +## Calcula as médias e variâncias +library(plyr) +mvr <- ddply(da, ~dexp + va, + summarise, y.mean = mean(count), y.var = var(count)) +mvr <- mvr[order(mvr$va, mvr$dexp), ] + +## Organiza lado a lado para apresenta em formato de tabela +mv <- mvr[, -(1:2)] +mv <- cbind(mvr[1:6, 1], mv[1:6, ], mv[7:12, ], mv[13:18, ]) + +## ## Formata o resultado no ambiente latex e não R (esperamos que eu não +## ## tenha feita nada errada, pois esse procedimento não é reproduzivel) +## xtable(mv) + +@ + +\begin{table}[ht] +\centering +\caption{Médias e variâncias amostras das contagens avaliadas no + experimento de capulhos de algodão sob efeito de Mosca-Branca} +\label{tab:mv-cottonBolls2} +\begin{tabular}{>{\centering\arraybackslash} p{2cm}*{6}{c}} + \toprule + \multirow{2}{\linewidth}{Dias de Exposição} & \multicolumn{2}{c}{N. Estruturas} & \multicolumn{2}{c}{N. Capulhos} & \multicolumn{2}{c}{N. Nós} \\ + \cmidrule(lr){2-3} \cmidrule(lr){4-5} \cmidrule(lr){6-7} + & média & variância & média & variância & média & variância \\ + \midrule + 0 & 4,50 & 0,50 & 4,40 & 0,93 & 13,60 & 2,27 \\ + 1 & 4,20 & 1,29 & 3,90 & 1,43 & 16,30 & 0,90 \\ + 2 & 3,90 & 1,21 & 3,40 & 1,60 & 16,10 & 4,54 \\ + 3 & 3,50 & 1,17 & 3,40 & 1,16 & 15,40 & 3,38 \\ + 4 & 3,80 & 1,07 & 3,70 & 1,34 & 15,80 & 2,62 \\ + 5 & 3,80 & 1,07 & 3,80 & 1,07 & 15,70 & 2,68 \\ + \bottomrule +\end{tabular} +\begin{tablenotes} + \small +\item Fonte: Elaborado pelo autor. +\end{tablenotes} +\end{table} + +\datatitle{Avaliação de Umidade do Solo e Doses de Potássio na Cultura + da Soja} + +<<data-soyaBeans, include=FALSE>>= + +data(soyaBeans, package = "tccPackage") +soyaBeans <- soyaBeans[-74, ] ## outlier identificado +soyaBeans <- soyaBeans[, c("K", "umid", "bloc", "ngra", "nvag")] + +@ + +Experimento fatorial 5 $\times$ 3 que estudou níveis de adubação +potássica e níveis de umidade do solo nos componentes de produção da +soja. O experimento foi instalado em casa de vegetação no delineamento +de blocos casualizados completos e a unidade experimental foi um vaso +com duas plantas de soja. No experimento foram medidas várias respostas, +sendo que o número de vagens por vaso e o número de grãos por vaso foram +as variáveis de contagem (\textbf{citar o artigo do experimento}). + +<<descr-soyaBeans, fig.height=4, fig.width=7.2, fig.cap="Disposição das variáveis de contagem nº de grãos e nº de vagens viáveis por parcela observadas no experimento com a cultura de soja">>= + +key <- list( + title = "Variável de contagem", + cex.title = 1, + type = "b", divide = 1, + ## points = list(pch = 1, col = cols), + lines = list(pch = 1, lty = 1, col = cols), + text = list(c("Nº de grãos por parcela", "Nº de vagens viáveis"))) + +xyplot(ngra + nvag ~ K | umid, + data = soja, + xlab = "Nível de adubação potássica", + ylab = "Contagem", + type = c("p", "g", "smooth"), + key = key, + layout = c(NA, 1), + strip = strip.custom( + strip.names = TRUE, var.name = "Umidade s"), + par.settings = ps.sub) + +fonte.xy("Fonte: Elaborado pelo autor.") + +@ + +Na figura \ref{fig:descr-soyaBeans} apresentamos a dispersão das +contagens nas combinações das covariáveis umidade do solo e adubação +potássica. Perceba que as duas variáveis de contagem avaliadas no +experimento apresentam níveis de dispersão distintos, essa +característica fica explícita na figura \ref{fig:mv-soyaBeans}, em que +apresentamos as dispersões entre médias e variâncias amostrais para cada +uma das variáveis de contagem. Para o número de grãos por parcela, com +contagens mais elevadas, as variâncias amostrais são, quase em sua +totalidade, superiores as médias caracterizando uma evidência de +superdispersão. Já para o número de vagens por parcela temos médias e +variâncias, em média, próximas o que é indícia que a suposição de +equidispersão é razoável. + +<<mv-soyaBeans, fig.height=3.5, fig.width=7, fig.cap="Médias e variâncias amostrais das contagens avaliadas no experimento com cultura de soja sob efeito umidade e adubação potássica">>= + +##------------------------------------------- +## Dados em long +da <- reshape2::melt( + soyaBeans, id = c("K", "umid", "bloc"), + variable.name = "va", value.name = "count") + +##------------------------------------------- +## Calcula média e variância +library(plyr) +mvr <- ddply(da, ~K + umid + va, + summarise, y.mean = mean(count), y.var = var(count)) +mvr <- mvr[order(mvr$va, mvr$umid, mvr$K), ] + +##------------------------------------------- +## Não consegui lidar com os limites dos eixos y +## xyplot(y.var ~ y.mean | va, data = mvr, +## scales = list(relation = "free"), +## panel = function(x, y, subscripts) { +## xlim <- ylim <- extendrange(c(x, y), f = 0.05) +## panel.xyplot(x, y, xlim = xlim, ylim = ylim) +## panel.abline(a = 0, b = 1, lty = 2) +## }) + +cols <- trellis.par.get("superpose.symbol")$col[1:2] +xy1 <- with(subset(mvr, va == "ngra"), { + xlim <- ylim <- extendrange(c(y.var, y.mean), f = 0.05) + xyplot(y.var ~ y.mean | va, + type = c("p", "g", "r"), + col = cols[1], + xlim = xlim, ylim = ylim, + ylab = "Variância Amostral", + xlab = "Média Amostral", + strip = strip.custom(factor.levels = "Nº de grãos"), + panel = function(x, y, ...) { + panel.xyplot(x, y, ...) + panel.abline(a = 0, b = 1, lty = 2) + }, par.settings = ps.sub) +}) + +xy2 <- with(subset(mvr, va == "nvag"), { + xlim <- ylim <- extendrange(c(y.var, y.mean), f = 0.05) + xyplot(y.var ~ y.mean | va, + type = c("p", "g", "r"), + col = cols[2], + xlim = xlim, ylim = ylim, + ylab = "Variância Amostral", + xlab = "Média Amostral", + strip = strip.custom(factor.levels = "Nº de vagens"), + panel = function(x, y, ...) { + panel.xyplot(x, y, ...) + panel.abline(a = 0, b = 1, lty = 2) + }, par.settings = ps.sub) +}) + +print(xy1, split = c(1, 1, 2, 1), more = TRUE) +print(xy2, split = c(2, 1, 2, 1), more = FALSE) + +fonte.xy("Fonte: Elaborado pelo autor.") + +@ + +\datatitle{Ocorrência de Ninfas de Mosca-Branca em Lavoura de Soja} + +<<data-whiteFly, include=FALSE>>= + +data(whiteFly, package = "tccPackage") + +## Somente as cultivares que contém BRS na identificação +whiteFly <- droplevels(subset(whiteFly, grepl("BRS", x = cult))) + +@ + +Nesse experimento também envolvendo a cultura de soja e a praga +Mosca-branca, foram avaliadas plantas de diferentes cultivares de soja +\texttt{BRS} - Embrapa contabilizando o número de ninfas de mosca-branca +nos folíolos dos terços superior, médio e inferior das plantas. O +experimento foi conduzido em casa de vegetação sob o delineamento de +blocos casualizados e as avaliações ocorreram em 6 datas dentre os 38 +dias do estudo \textbf{citar a dissertação do Renato}. + +Na figura \ref{fig:descr-ninfas}, à esquerda apresentamos as contagens +da praga para cada cultivar em cada uma das datas de avaliação. Note que +as contagens são muito altas e deveras dispersas, principalmente nas +quatro primeiras avaliações, à direita temos uma descrição no nível de +dispersão da variável de contagem. Perceba que esse é um conjunto de +dados extremamente superdisperso, os pontos, que representam em médias e +variâncias em cada combinação de dias de após a primeira avaliação e +cultivares de soja, estão todos acima da reta identidade (equidispersão) +com variâncias em torno de 1.000 vezes maiores que as respectivas +médias. + +<<descr-ninfas, fig.height=4, fig.width=8, fig.cap="(Esquerda)">>= + +xy1 <- xyplot(ntot ~ dias | cult, + data = whiteFly, + layout = c(NA, 2), + as.table = TRUE, + type = c("p", "g", "smooth"), + xlab = "Número de dias após o inicío do experimento", + ylab = "Número total de moscas-brancas", + spread = 0.08, + panel = panel.beeswarm, + par.settings = ps.sub) + +## Média e variância amostral para cada unidade experimental +mv <- aggregate(ntot ~ data + cult, data = whiteFly, + FUN = function(x) c(mean = mean(x), var = var(x))) + +## Evidência de subdispersão +xlim <- ylim <- extendrange(c(mv$ntot), f = 0.05) +xy2 <- xyplot(ntot[, "var"] ~ ntot[, "mean"], + data = mv, + type = c("p", "r", "g"), + xlim = xlim, + ylim = ylim, + ylab = expression("Variância Amostral"~(s^2)), + xlab = expression("Média amostral"~(bar(y))), + par.settings = ps.sub, + panel = function(x, y, ...) { + panel.xyplot(x, y, ...) + panel.abline(a = 0, b = 1, lty = 2) + }) + +print(xy1, split = c(1, 1, 2, 1), more = TRUE) +print(xy2, split = c(2, 1, 2, 1), more = FALSE) + +fonte.xy("Fonte: Elaborado pelo autor") + +@ + +\datatitle{Peixes Capturados por Pescadores em um Parque Estadual} + +<<data-fish, include=FALSE>>= + +data(fish, package = "tccPackage") +str(fish) + +@ + +Esse, diferentemente dos demais, é um estudo observavional feito por +biólogos que tem intresse em modelar o número de peixes capturados por +grupos de pescadores visitantes em um Parque Estadual \textbf{citar o + livro do J. Scott Long}. Nesse estudo tem-se como informações a +respeito dos grupos de visitantes o número de pessoas e de crianças no +grupo e se há ou não a presença de campista. Um fato interesse deste +dado é que nem todos os grupos de visitantes praticaram pescaria e +portanto o número de peixes capturado será zero. + +Na figura \ref{fig:descr-fish} também pode-se notar ser evidente a +presença excessiva de contagens zero. No gráfico a esquerda apresentamos +a disposição das contagens, transformadas por $\log(y_i|x_i + 0,5)$, e é +característica marcante no gráfico a grande quantidade de pontos +dispostos no primeiro valor do eixo $y$, \Sexpr{log(0.5)} = +$\log(0.5)$. Embora seja um gráfico marginal, não considerando as +covariáveis de cada contagem, a direita temos um histograma da variável +resposta onde percebe-se novamente a grande quantidade de valores nulos, +ao todo \Sexpr{with(fish, sum(npeixes == 0)/length(npeixes))*100}\% dos +dados são contagens nulas. Portanto nesse problema claramente modelos +alternativos que acomodem excesso de zeros se fazem necessários. + +<<descr-fish, fig.height=3.5, fig.width=7.5, fig.cap="(Esquerda) Logarítmo neperiano do Número de peixes capturados acrescido de 0,5 para as diferentes formulações dos grupos. (Direita) Histograma da variável de contagem número de peixes capturados por grupo.">>= + +xy1 <- xyplot(log(npeixes+0.5) ~ npessoas | campista, + groups = ncriancas, data = fish, + jitter.x = TRUE, + jitter.y = TRUE, + type = c("p", "g", "smooth"), + xlab = "Número de pessoas no grupo", + ylab = expression(log(~"Número de peixes"~+0.5)), + auto.key = list( + columns = 2, cex.title = 1, + lines = TRUE, points = FALSE, + title = "Número de crianças"), + strip = strip.custom( + strip.names = TRUE, var.name = "campista" + )) + +suppressWarnings( + xy2 <- histogram(~npeixes, data = fish, nint = 50, + xlab = "Número de peixes capturados", + ylab = "Percentual", + grid = TRUE, + panel = function(x, ...) { + panel.histogram(x, ...) + panel.rug(x) + }) +) + +suppressWarnings({ + print(xy1, split = c(1, 1, 2, 1), more = TRUE) + print(xy2, split = c(2, 1, 2, 1), more = FALSE) +}) + +@ + +\subsection{Recursos computacionais} +\label{cap03:materiais-recursos} + +O \textit{software} R, versão \Sexpr{with(R.version, paste0(major, ".", + minor))}, é utilizado tanto para a preparação e apresentação dos dados +quanto para ajuste dos modelos e apresentação de resultados. Pacotes +auxiliares utilizados no trabalho são: \texttt{MASS} (versão +\Sexpr{packageVersion("MASS")}) para ajuste e inferências dos modelos +Binomial Negativo,\texttt{bbmle} (versão +\Sexpr{packageVersion("bbmle")}) para estimação via máxima +verossimilhança das funções implementadas para o modelo COM-Poisson , +\texttt{pscl} (versão \Sexpr{packageVersion("pscl")}) para ajuste dos +modelos Poisson e Binomial Negativo com componente de barreira para +modelagem de excesso de zeros e \texttt{lme4} (versão +\Sexpr{packageVersion("lme4")}) para ajuste dos modelos Poisson com +efeitos aleatórios normais. + + \section{Métodos} \label{cap03:metodos} diff --git a/docs/compois.bib b/docs/compois.bib index 81b2eb5d3301ead591ccf371896af2a89ac6689e..245dafeaad1b34896f7b5f12b9dea1d6314b12d9 100644 --- a/docs/compois.bib +++ b/docs/compois.bib @@ -12,49 +12,37 @@ url = {http://lme4.r-forge.r-project.org/lMMwR/lrgprt.pdf}, volume = {67}, year = {2015} } -@article{Lambert1992, -author = {Lambert, Diane}, -doi = {10.2307/1269547}, -file = {:home/eduardo/Documents/Mendeley Desktop/lambert1992.pdf:pdf}, -issn = {00401706}, -journal = {Technometrics}, +@phdthesis{Borges2012, +author = {Borges, Patrick}, +file = {:home/eduardo/Documents/Mendeley Desktop/4552.pdf:pdf}, mendeley-groups = {TCC{\_}UFPR{\_}2015}, -month = {feb}, -number = {1}, -pages = {1}, -title = {{Zero-Inflated Poisson Regression, with an Application to Defects in Manufacturing}}, -url = {http://www.jstor.org/stable/1269547?origin=crossref}, -volume = {34}, -year = {1992} +school = {Universidade Federal de S{\~{a}}o Carlos}, +title = {{Novos modelos de sobreviv{\^{e}}ncia com fra{\c{c}}{\~{a}}o de cura baseados no processo da carcinog{\^{e}}nese}}, +year = {2012} } -@article{Sellers2016, -abstract = {Excess zeroes are often thought of as a cause of data over-dispersion (i.e. when the variance exceeds the mean); this claim is not entirely accurate. In actuality, excess zeroes reduce the mean of a dataset, thus inflating the dispersion index (i.e. the variance divided by the mean). While this results in an increased chance for data over-dispersion, the implication is not guaranteed. Thus, one should consider a flexible distribution that not only can account for excess zeroes, but can also address potential over- or under-dispersion. A zero-inflated Conway-Maxwell-Poisson (ZICMP) regression allows for modeling the relationship between explanatory and response variables, while capturing the effects due to excess zeroes and dispersion. This work derives the ZICMP model and illustrates its flexibility, extrapolates the corresponding likelihood ratio test for the presence of significant data dispersion, and highlights various statistical properties and model fit through several examples.}, -author = {Sellers, Kimberly F. and Raim, Andrew}, -doi = {10.1016/j.csda.2016.01.007}, -file = {:home/eduardo/Documents/Mendeley Desktop/compoissonzeroinflated.pdf:pdf}, -issn = {01679473}, -journal = {Computational Statistics {\&} Data Analysis}, -keywords = {Conway-Maxwell-Poisson,Excess zeroes,Over-dispersion,Under-dispersion}, +@article{Conway1962, +author = {Conway, Richard W and Maxwell, William L}, +journal = {Journal of Industrial Engineering}, mendeley-groups = {TCC{\_}UFPR{\_}2015}, -month = {jul}, -pages = {68--80}, -publisher = {Elsevier B.V.}, -title = {{A flexible zero-inflated model to address data dispersion}}, -url = {http://dx.doi.org/10.1016/j.csda.2016.01.007 http://linkinghub.elsevier.com/retrieve/pii/S0167947316000165}, -volume = {99}, -year = {2016} +pages = {132----136}, +title = {{A queuing model with state dependent service rates}}, +volume = {12}, +year = {1962} } -@article{Ridout1998, -abstract = {We consider the problem of modelling count data with excess zeros and review some possible models. Aspects of model tting and inference are considered. An example from horticultural research is used for illustration.}, -author = {Ridout, Martin and Demetrio, Clarice G.B and Hinde, John}, -file = {:home/eduardo/Documents/Mendeley Desktop/ibc{\_}fin.pdf:pdf}, -journal = {International Biometric Conference}, -keywords = {count data,em algorithm,hurdle models,negative binomial,poisson,zero inflation}, +@book{Hilbe2014, +abstract = {This entry-level text offers clear and concise guidelines on how to select, construct, interpret and evaluate count data. Written for researchers with little or no background in advanced statistics, the book presents treatments of all major models using numerous tables, insets, and detailed modeling suggestions. It begins by demonstrating the fundamentals of linear regression and works up to an analysis of the Poisson and negative binomial models, and to the problem of overdispersion. Examples in Stata, R, and SAS code enable readers to adapt models for their own purposes, making the text an ideal resource for researchers working in public health, ecology, econometrics, transportation, and other related fields.}, +author = {Hilbe, Joseph M.}, +booktitle = {Statistical Science}, +doi = {10.1017/CBO9781139236065}, +file = {:home/eduardo/Documents/Mendeley Desktop/Hilbe - 2014 - Modeling Count Data.pdf:pdf}, +isbn = {ISBN 978-1-107-02833-3}, +issn = {1467-9280}, mendeley-groups = {TCC{\_}UFPR{\_}2015}, -number = {December}, -pages = {1--13}, -title = {{Models for count data with many zeros}}, -year = {1998} +pages = {300}, +pmid = {25052830}, +title = {{Modeling Count Data}}, +volume = {25}, +year = {2014} } @article{King1989, abstract = {This paper discusses the problem of variance specification in models for event count data. Event counts are dependent variables that can take on only nonnegative integer values, such as the number of wars or coups d'etat in a year. I discuss several generalizations of the Poisson regression model, presented in King (1988), to allow for substantively interesting stochastic processes that do not fit into the Poisson framework. Individual models that cope with, and help analyze, heterogeneity, contagion, and negative contagion are each shown to lead to specific statistical models for event count data. In addition, I derive a new generalized event count (GEC) model that enables researchers to extract significant amounts of new information from existing data by estimating features of these unobserved substantive processes. Applications of this model to congressional challenges of presidential vetoes and superpower conflict demonstrate the dramatic advantages of this approach.}, @@ -83,30 +71,89 @@ title = {{Over- and Underdisperson Models}}, url = {https://lmb.univ-fcomte.fr/IMG/pdf/ch30{\_}kokonendji2014.pdf}, year = {2014} } -@phdthesis{Borges2012, -author = {Borges, Patrick}, -file = {:home/eduardo/Documents/Mendeley Desktop/4552.pdf:pdf}, +@article{Lambert1992, +author = {Lambert, Diane}, +doi = {10.2307/1269547}, +file = {:home/eduardo/Documents/Mendeley Desktop/lambert1992.pdf:pdf}, +issn = {00401706}, +journal = {Technometrics}, mendeley-groups = {TCC{\_}UFPR{\_}2015}, -school = {Universidade Federal de S{\~{a}}o Carlos}, -title = {{Novos modelos de sobreviv{\^{e}}ncia com fra{\c{c}}{\~{a}}o de cura baseados no processo da carcinog{\^{e}}nese}}, -year = {2012} +month = {feb}, +number = {1}, +pages = {1}, +title = {{Zero-Inflated Poisson Regression, with an Application to Defects in Manufacturing}}, +url = {http://www.jstor.org/stable/1269547?origin=crossref}, +volume = {34}, +year = {1992} } -@misc{Winkelmann1994, -abstract = {"This paper deals with the estimation of single equation models in which the counts are regressed on a set of observed individual characteristics such as age, gender, or nationality.... We propose a generalized event count model to simultaneously allow for a wide class of count data models and account for over- and underdispersion. This model is successfully applied to German data on fertility, divorces and mobility." (SUMMARY IN FRE)}, -author = {Winkelmann, R and Zimmermann, K F}, -booktitle = {Mathematical population studies}, -doi = {10.1080/08898489409525374}, -file = {:home/eduardo/Documents/Mendeley Desktop/41{\_}CountDataModel{\_}MathematicalPopulationStudies{\_}1993.pdf:pdf}, -isbn = {9780470510247}, -issn = {0889-8480}, -keywords = {Demographic Factors,Developed Countries,Divorce,Estimation Technics,Europe,Fertility,Germany,Mathematical Model,Migration,Models,Nuptiality,Population,Population Dynamics,Research Methodology,Theoretical,Western Europe}, +@article{Lord2010, +abstract = {The objective of this article is to evaluate the performance of the COM-Poisson GLM for analyzing crash data exhibiting underdispersion (when conditional on the mean). The COM-Poisson distribution, originally developed in 1962, has recently been reintroduced by statisticians for analyzing count data subjected to either over- or underdispersion. Over the last year, the COM-Poisson GLM has been evaluated in the context of crash data analysis and it has been shown that the model performs as well as the Poisson-gamma model for crash data exhibiting overdispersion. To accomplish the objective of this study, several COM-Poisson models were estimated using crash data collected at 162 railway-highway crossings in South Korea between 1998 and 2002. This data set has been shown to exhibit underdispersion when models linking crash data to various explanatory variables are estimated. The modeling results were compared to those produced from the Poisson and gamma probability models documented in a previous published study. The results of this research show that the COM-Poisson GLM can handle crash data when the modeling output shows signs of underdispersion. Finally, they also show that the model proposed in this study provides better statistical performance than the gamma probability and the traditional Poisson models, at least for this data set.}, +author = {Lord, Dominique and Geedipally, Srinivas Reddy and Guikema, Seth D.}, +doi = {10.1111/j.1539-6924.2010.01417.x}, +file = {:home/eduardo/Documents/Mendeley Desktop/Lord, Geedipally, Guikema - 2010 - Extension of the application of conway-maxwell-poisson models Analyzing traffic crash data exhibiting.pdf:pdf}, +isbn = {1539-6924 (Electronic) 0272-4332 (Linking)}, +issn = {02724332}, +journal = {Risk Analysis}, +keywords = {Com-poisson,Conway-Maxwell-Poisson,gamma models,negative binomial models,regression models,underdispersion}, mendeley-groups = {TCC{\_}UFPR{\_}2015}, -number = {3}, -pages = {205--221, 223}, -pmid = {12287090}, -title = {{Count data models for demographic data}}, -volume = {4}, -year = {1994} +mendeley-tags = {Com-poisson}, +number = {8}, +pages = {1268--1276}, +pmid = {20412518}, +title = {{Extension of the application of conway-maxwell-poisson models: Analyzing traffic crash data exhibiting underdispersion}}, +volume = {30}, +year = {2010} +} +@article{Nelder1972, +author = {Nelder, John Ashworth and Wedderburn, Robert William Maclagan}, +file = {:home/eduardo/Documents/Mendeley Desktop/Nelder, Wedderburn - 1972 - Generalized Linear Models.pdf:pdf}, +journal = {Journal of the Royal Statistical Society. Series A (General)}, +mendeley-groups = {TCC{\_}UFPR{\_}2015}, +pages = {370--384}, +title = {{Generalized Linear Models}}, +volume = {135}, +year = {1972} +} +@article{Park2009, +abstract = {Developing sound or reliable statistical models for analyzing motor vehicle crashes is very important in highway safety studies. However, a significant difficulty associated with the model development is related to the fact that crash data often exhibit over-dispersion. Sources of dispersion can be varied and are usually unknown to the transportation analysts. These sources could potentially affect the development of negative binomial (NB) regression models, which are often the model of choice in highway safety. To help in this endeavor, this paper documents an alternative formulation that could be used for capturing heterogeneity in crash count models through the use of finite mixture regression models. The finite mixtures of Poisson or NB regression models are especially useful where count data were drawn from heterogeneous populations. These models can help determine sub-populations or groups in the data among others. To evaluate these models, Poisson and NB mixture models were estimated using data collected in Toronto, Ontario. These models were compared to standard NB regression model estimated using the same data. The results of this study show that the dataset seemed to be generated from two distinct sub-populations, each having different regression coefficients and degrees of over-dispersion. Although over-dispersion in crash data can be dealt with in a variety of ways, the mixture model can help provide the nature of the over-dispersion in the data. It is therefore recommended that transportation safety analysts use this type of model before the traditional NB model, especially when the data are suspected to belong to different groups.}, +author = {Park, Byung-Jung and Lord, Dominique}, +doi = {10.1016/j.aap.2009.03.007}, +file = {:home/eduardo/Documents/Mendeley Desktop/Park, Lord - 2009 - Application of finite mixture models for vehicle crash data analysis.pdf:pdf;:home/eduardo/Documents/Mendeley Desktop/Park, Lord - 2009 - Application of finite mixture models for vehicle crash data analysis(2).pdf:pdf}, +issn = {1879-2057}, +journal = {Accident; analysis and prevention}, +keywords = {Com-poisson}, +mendeley-groups = {TCC{\_}UFPR{\_}2015}, +mendeley-tags = {Com-poisson}, +number = {4}, +pages = {683--691}, +pmid = {19540956}, +title = {{Application of finite mixture models for vehicle crash data analysis.}}, +volume = {41}, +year = {2009} +} +@book{Paula2013, +abstract = {A {\'{a}}rea de modelagem estat{\'{i}}stica de regress{\~{a}}o recebeu um grande impulso desde a cria{\c{c}}{\~{a}}o dos modelos lineares generalizados (MLGs) no in{\'{i}}cio da d{\'{e}}- cada de 70. O crescente interesse pela {\'{a}}rea motivou a realiza{\c{c}}{\~{a}}o de v{\'{a}}rios encontros informais no in{\'{i}}cio dos anos 80, a maioria deles na Inglaterra, at{\'{e}} que em 1986 foi realizado na cidade de Innsbruck na {\'{A}}ustria o “1st Internati- onalWorkshop on Statistical Modelling”(1st IWSM). Esse encontro tem sido realizado anualmente sendo que o {\'{u}}ltimo (25th IWSM) aconteceu em julho de 2010 na Universidade de Glasgow, Esc{\'{o}}cia. O 26th IWSM ser{\'{a}} realizado em julho de 2011 em Val{\^{e}}ncia, Espanha. No Brasil a {\'{a}}rea come{\c{c}}ou efetiva- mente a se desenvolver a partir de meados da d{\'{e}}cada de 80 e em particular ap{\'{o}}s a 1a Escola de Modelos de Regress{\~{a}}o (1EMR) realizada na Universi- dade de S{\~{a}}o Paulo em 1989. As demais escolas ocorreram desde ent{\~{a}}o a cada dois anos sendo que a {\'{u}}ltima (11EMR) foi realizada em mar{\c{c}}o de 2009 na cidade de Recife, PE. A 12EMR ser{\'{a}} realizada em mar{\c{c}}o de 2011 na cidade de Fortaleza, CE.}, +author = {Paula, Gilberto Alvarenga}, +file = {:home/eduardo/Documents/Mendeley Desktop/Paula - 2013 - Modelos de regress{\~{a}}o com apoio computacional.pdf:pdf}, +keywords = {GLM,Regress{\~{a}}o}, +mendeley-groups = {TCC{\_}UFPR{\_}2015}, +mendeley-tags = {GLM,Regress{\~{a}}o}, +publisher = {IME-USP S{\~{a}}o Paulo}, +title = {{Modelos de regress{\~{a}}o com apoio computacional}}, +url = {https://www.ime.usp.br/{~}giapaula/textoregressao.htm}, +year = {2013} +} +@inproceedings{RibeiroJr2012, +author = {{Ribeiro Jr}, Paulo Justiniano and Bonat, Wagner Hugo and Krainski, Elias Teixeira and Zeviani, Walmes Marques}, +booktitle = {20{\textordmasculine} Simp{\'{o}}sio Nacional de Probabilidade e Estat{\'{i}}stica}, +file = {:home/eduardo/Documents/Mendeley Desktop/Ribeiro Jr et al. - 2012 - M{\'{e}}todos computacionais para infer{\^{e}}ncia com aplica{\c{c}}{\~{o}}es em R.pdf:pdf}, +keywords = {Infer{\^{e}}ncia,M{\'{e}}todos Computacionais,Verossimilhan{\c{c}}a}, +mendeley-groups = {TCC{\_}UFPR{\_}2015}, +mendeley-tags = {Infer{\^{e}}ncia,M{\'{e}}todos Computacionais,Verossimilhan{\c{c}}a}, +pages = {282}, +title = {{M{\'{e}}todos computacionais para infer{\^{e}}ncia com aplica{\c{c}}{\~{o}}es em R}}, +url = {http://leg.ufpr.br/doku.php/cursos:mcie}, +year = {2012} } @phdthesis{Ribeiro2012, author = {Ribeiro, Ang{\'{e}}lica Maria Tortola}, @@ -116,6 +163,87 @@ school = {Universidade Federal de S{\~{a}}o Carlos}, title = {{Distribui{\c{c}}{\~{a}}o COM-Poisson na an{\'{a}}lise de dados de experimentos de quimiopreven{\c{c}}{\~{a}}o do c{\^{a}}ncer em animais}}, year = {2012} } +@article{Ridout1998, +abstract = {We consider the problem of modelling count data with excess zeros and review some possible models. Aspects of model tting and inference are considered. An example from horticultural research is used for illustration.}, +author = {Ridout, Martin and Demetrio, Clarice G.B and Hinde, John}, +file = {:home/eduardo/Documents/Mendeley Desktop/ibc{\_}fin.pdf:pdf}, +journal = {International Biometric Conference}, +keywords = {count data,em algorithm,hurdle models,negative binomial,poisson,zero inflation}, +mendeley-groups = {TCC{\_}UFPR{\_}2015}, +number = {December}, +pages = {1--13}, +title = {{Models for count data with many zeros}}, +year = {1998} +} +@article{Sellers2016, +abstract = {Excess zeroes are often thought of as a cause of data over-dispersion (i.e. when the variance exceeds the mean); this claim is not entirely accurate. In actuality, excess zeroes reduce the mean of a dataset, thus inflating the dispersion index (i.e. the variance divided by the mean). While this results in an increased chance for data over-dispersion, the implication is not guaranteed. Thus, one should consider a flexible distribution that not only can account for excess zeroes, but can also address potential over- or under-dispersion. A zero-inflated Conway-Maxwell-Poisson (ZICMP) regression allows for modeling the relationship between explanatory and response variables, while capturing the effects due to excess zeroes and dispersion. This work derives the ZICMP model and illustrates its flexibility, extrapolates the corresponding likelihood ratio test for the presence of significant data dispersion, and highlights various statistical properties and model fit through several examples.}, +author = {Sellers, Kimberly F. and Raim, Andrew}, +doi = {10.1016/j.csda.2016.01.007}, +file = {:home/eduardo/Documents/Mendeley Desktop/compoissonzeroinflated.pdf:pdf}, +issn = {01679473}, +journal = {Computational Statistics {\&} Data Analysis}, +keywords = {Conway-Maxwell-Poisson,Excess zeroes,Over-dispersion,Under-dispersion}, +mendeley-groups = {TCC{\_}UFPR{\_}2015}, +month = {jul}, +pages = {68--80}, +publisher = {Elsevier B.V.}, +title = {{A flexible zero-inflated model to address data dispersion}}, +url = {http://dx.doi.org/10.1016/j.csda.2016.01.007 http://linkinghub.elsevier.com/retrieve/pii/S0167947316000165}, +volume = {99}, +year = {2016} +} +@article{Sellers2010, +abstract = {Poisson regression is a popular tool for modeling count data and is applied in a vast array of applications from the social to the physical sciences and beyond. Real data, however, are often over- or under-dispersed and, thus, not conducive to Poisson regression. We propose a regression model based on the Conway--Maxwell-Poisson (COM-Poisson) distribution to address this problem. The COM-Poisson regression generalizes the well-known Poisson and logistic regression models, and is suitable for fitting count data with a wide range of dispersion levels. With a GLM approach that takes advantage of exponential family properties, we discuss model estimation, inference, diagnostics, and interpretation, and present a test for determining the need for a COM-Poisson regression over a standard Poisson regression. We compare the COM-Poisson to several alternatives and illustrate its advantages and usefulness using three data sets with varying dispersion.}, +annote = {Refer{\^{e}}ncia para COMPoissonReg package}, +archivePrefix = {arXiv}, +arxivId = {1011.2077}, +author = {Sellers, Kimberly F. and Shmueli, Galit}, +doi = {10.1214/09-AOAS306}, +eprint = {1011.2077}, +file = {:home/eduardo/Documents/Mendeley Desktop/Sellers, Shmueli - 2010 - A flexible regression model for count data.pdf:pdf;:home/eduardo/Documents/Mendeley Desktop/Sellers, Shmueli - 2010 - A flexible regression model for count data(2).pdf:pdf}, +issn = {19326157}, +journal = {Annals of Applied Statistics}, +keywords = {Com-poisson,Conway-Maxwell-Poisson (COM-Poisson) distribution,Dispersion,Generalized Poisson,Generalized linear models (GLM)}, +mendeley-groups = {TCC{\_}UFPR{\_}2015}, +mendeley-tags = {Com-poisson}, +number = {2}, +pages = {943--961}, +title = {{A flexible regression model for count data}}, +volume = {4}, +year = {2010} +} +@article{Shmueli2005, +abstract = {A useful discrete distribution (the Conway2013Maxwell2013Poisson distribution) is revived and its statistical and probabilistic properties are introduced and explored. This distribution is a two-parameter extension of the Poisson distribution that generalizes some well-known discrete distributions (Poisson, Bernoulli and geometric). It also leads to the generalization of distributions derived from these discrete distributions (i.e. the binomial and negative binomial distributions). We describe three methods for estimating the parameters of the Conway2013Maxwell2013Poisson distribution. The first is a fast simple weighted least squares method, which leads to estimates that are sufficiently accurate for practical purposes. The second method, using maximum likelihood, can be used to refine the initial estimates. This method requires iterations and is more computationally intensive. The third estimation method is Bayesian. Using the conjugate prior, the posterior density of the parameters of the Conway2013Maxwell2013Poisson distribution is easily computed. It is a flexible distribution that can account for overdispersion or underdispersion that is commonly encountered in count data. We also explore two sets of real world data demonstrating the flexibility and elegance of the Conway2013Maxwell2013Poisson distribution in fitting count data which do not seem to follow the Poisson distribution.}, +annote = {Refer{\^{e}}ncia para compoisson package}, +author = {Shmueli, Galit and Minka, Thomas P. and Kadane, Joseph B. and Borle, Sharad and Boatwright, Peter}, +doi = {10.1111/j.1467-9876.2005.00474.x}, +file = {:home/eduardo/Documents/Mendeley Desktop/Shmueli et al. - 2005 - A useful distribution for fitting discrete data Revival of the Conway-Maxwell-Poisson distribution.pdf:pdf}, +isbn = {1467-9876}, +issn = {00359254}, +journal = {Journal of the Royal Statistical Society. Series C: Applied Statistics}, +keywords = {Com-poisson,Conjugate family,Conway-Maxwell-Poisson distribution,Estimation,Exponential family,Overdispersion,Underdispersion}, +mendeley-groups = {TCC{\_}UFPR{\_}2015}, +mendeley-tags = {Com-poisson}, +number = {1}, +pages = {127--142}, +title = {{A useful distribution for fitting discrete data: Revival of the Conway-Maxwell-Poisson distribution}}, +volume = {54}, +year = {2005} +} +@article{Silva2012, +author = {Silva, A. M. and Degrande, P. E. and Suekane, R. and Fernandes, M. G. and Zeviani, W. M.}, +file = {:home/eduardo/Documents/Mendeley Desktop/v35n1a16.pdf:pdf}, +issn = {0871-018X}, +journal = {Revista de Ci{\^{e}}ncias Agr{\'{a}}rias}, +keywords = {bolls,cotton,crop growth stage,crop yield,defoliation,leaf area,phenology,productivity,yield losses}, +mendeley-groups = {TCC{\_}UFPR{\_}2015}, +number = {1}, +pages = {163--172}, +title = {{Impacto de diferentes n{\'{i}}veis de desfolha artificial nos est{\'{a}}dios fenol{\'{o}}gicos do algodoeiro}}, +url = {http://www.cabdirect.org/abstracts/20123299470.html;jsessionid=CF06663390A3A4463413D3018ECAACD6}, +volume = {35}, +year = {2012} +} @article{Wedderburn1974, author = {Wedderburn, R. W. M.}, doi = {10.2307/2334725}, @@ -131,6 +259,23 @@ url = {http://www.jstor.org/stable/2334725?origin=crossref}, volume = {61}, year = {1974} } +@misc{Winkelmann1994, +abstract = {"This paper deals with the estimation of single equation models in which the counts are regressed on a set of observed individual characteristics such as age, gender, or nationality.... We propose a generalized event count model to simultaneously allow for a wide class of count data models and account for over- and underdispersion. This model is successfully applied to German data on fertility, divorces and mobility." (SUMMARY IN FRE)}, +author = {Winkelmann, R and Zimmermann, K F}, +booktitle = {Mathematical population studies}, +doi = {10.1080/08898489409525374}, +file = {:home/eduardo/Documents/Mendeley Desktop/41{\_}CountDataModel{\_}MathematicalPopulationStudies{\_}1993.pdf:pdf}, +isbn = {9780470510247}, +issn = {0889-8480}, +keywords = {Demographic Factors,Developed Countries,Divorce,Estimation Technics,Europe,Fertility,Germany,Mathematical Model,Migration,Models,Nuptiality,Population,Population Dynamics,Research Methodology,Theoretical,Western Europe}, +mendeley-groups = {TCC{\_}UFPR{\_}2015}, +number = {3}, +pages = {205--221, 223}, +pmid = {12287090}, +title = {{Count data models for demographic data}}, +volume = {4}, +year = {1994} +} @article{Winkelmann1995, author = {Winkelmann, Rainer}, doi = {10.1080/07350015.1995.10524620}, @@ -146,6 +291,20 @@ url = {http://www.tandfonline.com/doi/abs/10.1080/07350015.1995.10524620}, volume = {13}, year = {1995} } +@book{Winkelmann2008, +address = {Berlin, Heidelberg}, +author = {Winkelmann, Rainer}, +booktitle = {Vasa}, +doi = {10.1007/978-3-540-78389-3}, +file = {:home/eduardo/Documents/Mendeley Desktop/Winkelmann - 2008 - Econometric Analysis of Count Data.pdf:pdf}, +isbn = {978-3-540-77648-2}, +mendeley-groups = {TCC{\_}UFPR{\_}2015}, +pages = {342}, +publisher = {Springer Berlin Heidelberg}, +title = {{Econometric Analysis of Count Data}}, +url = {http://medcontent.metapress.com/index/A65RM03P4874243N.pdf http://link.springer.com/10.1007/978-3-540-78389-3}, +year = {2008} +} @article{Zeileis2007, abstract = {To offer a practical demonstration of regression models recommended for count outcomes using longitudinal predictors of children's medically attended injuries.}, author = {Zeileis, Achim and Kleiber, Christian and Jackman, Simon}, @@ -164,83 +323,6 @@ url = {http://www.ncbi.nlm.nih.gov/pubmed/21518631}, volume = {27}, year = {2007} } -@book{Hilbe2014, -abstract = {This entry-level text offers clear and concise guidelines on how to select, construct, interpret and evaluate count data. Written for researchers with little or no background in advanced statistics, the book presents treatments of all major models using numerous tables, insets, and detailed modeling suggestions. It begins by demonstrating the fundamentals of linear regression and works up to an analysis of the Poisson and negative binomial models, and to the problem of overdispersion. Examples in Stata, R, and SAS code enable readers to adapt models for their own purposes, making the text an ideal resource for researchers working in public health, ecology, econometrics, transportation, and other related fields.}, -author = {Hilbe, Joseph M.}, -booktitle = {Statistical Science}, -doi = {10.1017/CBO9781139236065}, -file = {:home/eduardo/Documents/Mendeley Desktop/Hilbe - 2014 - Modeling Count Data.pdf:pdf}, -isbn = {ISBN 978-1-107-02833-3}, -issn = {1467-9280}, -mendeley-groups = {TCC{\_}UFPR{\_}2015}, -pages = {300}, -pmid = {25052830}, -title = {{Modeling Count Data}}, -volume = {25}, -year = {2014} -} -@book{Winkelmann2008, -address = {Berlin, Heidelberg}, -author = {Winkelmann, Rainer}, -booktitle = {Vasa}, -doi = {10.1007/978-3-540-78389-3}, -file = {:home/eduardo/Documents/Mendeley Desktop/Winkelmann - 2008 - Econometric Analysis of Count Data.pdf:pdf}, -isbn = {978-3-540-77648-2}, -mendeley-groups = {TCC{\_}UFPR{\_}2015}, -pages = {342}, -publisher = {Springer Berlin Heidelberg}, -title = {{Econometric Analysis of Count Data}}, -url = {http://medcontent.metapress.com/index/A65RM03P4874243N.pdf http://link.springer.com/10.1007/978-3-540-78389-3}, -year = {2008} -} -@article{Nelder1972, -author = {Nelder, John Ashworth and Wedderburn, Robert William Maclagan}, -file = {:home/eduardo/Documents/Mendeley Desktop/Nelder, Wedderburn - 1972 - Generalized Linear Models.pdf:pdf}, -journal = {Journal of the Royal Statistical Society. Series A (General)}, -mendeley-groups = {TCC{\_}UFPR{\_}2015}, -pages = {370--384}, -title = {{Generalized Linear Models}}, -volume = {135}, -year = {1972} -} -@article{Conway1962, -author = {Conway, Richard W and Maxwell, William L}, -journal = {Journal of Industrial Engineering}, -mendeley-groups = {TCC{\_}UFPR{\_}2015}, -pages = {132----136}, -title = {{A queuing model with state dependent service rates}}, -volume = {12}, -year = {1962} -} -@inproceedings{RibeiroJr2012, -author = {{Ribeiro Jr}, Paulo Justiniano and Bonat, Wagner Hugo and Krainski, Elias Teixeira and Zeviani, Walmes Marques}, -booktitle = {20{\textordmasculine} Simp{\'{o}}sio Nacional de Probabilidade e Estat{\'{i}}stica}, -file = {:home/eduardo/Documents/Mendeley Desktop/Ribeiro Jr et al. - 2012 - M{\'{e}}todos computacionais para infer{\^{e}}ncia com aplica{\c{c}}{\~{o}}es em R.pdf:pdf}, -keywords = {Infer{\^{e}}ncia,M{\'{e}}todos Computacionais,Verossimilhan{\c{c}}a}, -mendeley-groups = {TCC{\_}UFPR{\_}2015}, -mendeley-tags = {Infer{\^{e}}ncia,M{\'{e}}todos Computacionais,Verossimilhan{\c{c}}a}, -pages = {282}, -title = {{M{\'{e}}todos computacionais para infer{\^{e}}ncia com aplica{\c{c}}{\~{o}}es em R}}, -url = {http://leg.ufpr.br/doku.php/cursos:mcie}, -year = {2012} -} -@article{Park2009, -abstract = {Developing sound or reliable statistical models for analyzing motor vehicle crashes is very important in highway safety studies. However, a significant difficulty associated with the model development is related to the fact that crash data often exhibit over-dispersion. Sources of dispersion can be varied and are usually unknown to the transportation analysts. These sources could potentially affect the development of negative binomial (NB) regression models, which are often the model of choice in highway safety. To help in this endeavor, this paper documents an alternative formulation that could be used for capturing heterogeneity in crash count models through the use of finite mixture regression models. The finite mixtures of Poisson or NB regression models are especially useful where count data were drawn from heterogeneous populations. These models can help determine sub-populations or groups in the data among others. To evaluate these models, Poisson and NB mixture models were estimated using data collected in Toronto, Ontario. These models were compared to standard NB regression model estimated using the same data. The results of this study show that the dataset seemed to be generated from two distinct sub-populations, each having different regression coefficients and degrees of over-dispersion. Although over-dispersion in crash data can be dealt with in a variety of ways, the mixture model can help provide the nature of the over-dispersion in the data. It is therefore recommended that transportation safety analysts use this type of model before the traditional NB model, especially when the data are suspected to belong to different groups.}, -author = {Park, Byung-Jung and Lord, Dominique}, -doi = {10.1016/j.aap.2009.03.007}, -file = {:home/eduardo/Documents/Mendeley Desktop/Park, Lord - 2009 - Application of finite mixture models for vehicle crash data analysis.pdf:pdf;:home/eduardo/Documents/Mendeley Desktop/Park, Lord - 2009 - Application of finite mixture models for vehicle crash data analysis(2).pdf:pdf}, -issn = {1879-2057}, -journal = {Accident; analysis and prevention}, -keywords = {Com-poisson}, -mendeley-groups = {TCC{\_}UFPR{\_}2015}, -mendeley-tags = {Com-poisson}, -number = {4}, -pages = {683--691}, -pmid = {19540956}, -title = {{Application of finite mixture models for vehicle crash data analysis.}}, -volume = {41}, -year = {2009} -} @article{Zeviani2014, abstract = {Event counts are response variables with non-negative integer values representing the number of times that an event occurs within a fixed domain such as a time interval, a geographical area or a cell of a contingency table. Analysis of counts by Gaussian regression models ignores the discreteness, asymmetry and heteroscedasticity and is inefficient, providing unrealistic standard errors or possibly negative predictions of the expected number of events. The Poisson regression is the standard model for count data with underlying assumptions on the generating process which may be implausible in many applications. Statisticians have long recognized the limitation of imposing equidispersion under the Poisson regression model. A typical situation is when the conditional variance exceeds the conditional mean, in which case models allowing for overdispersion are routinely used. Less reported is the case of underdispersion with fewer modeling alternatives and assessments available in the literature. One of such alternatives, the Gamma-count model, is adopted here in the analysis of an agronomic experiment designed to investigate the effect of levels of defoliation on different phenological states upon the number of cotton bolls. Data set and code for analysis are available as online supplements. Results show improvements over the Poisson model and the semi-parametric quasi-Poisson model in capturing the observed variability in the data. Estimating rather than assuming the underlying variance process leads to important insights into the process. Event counts are response variables with non-negative integer values representing the number of times that an event occurs within a fixed domain such as a time interval, a geographical area or a cell of a contingency table. Analysis of counts by Gaussian regression models ignores the discreteness, asymmetry and heteroscedasticity and is inefficient, providing unrealistic standard errors or possibly negative predictions of the expected number of events. The Poisson regression is the standard model for count data with underlying assumptions on the generating process which may be implausible in many applications. Statisticians have long recognized the limitation of imposing equidispersion under the Poisson regression model. A typical situation is when the conditional variance exceeds the conditional mean, in which case models allowing for overdispersion are routinely used. Less reported is the case of underdispersion with fewer modeling alternatives and assessments available in the literature. One of such alternatives, the Gamma-count model, is adopted here in the analysis of an agronomic experiment designed to investigate the effect of levels of defoliation on different phenological states upon the number of cotton bolls. Data set and code for analysis are available as online supplements. Results show improvements over the Poisson model and the semi-parametric quasi-Poisson model in capturing the observed variability in the data. Estimating rather than assuming the underlying variance process leads to important insights into the process.}, author = {Zeviani, Walmes Marques and {Ribeiro Jr}, Paulo Justiniano and Bonat, Wagner Hugo and Shimakura, Silvia Emiko and Muniz, Joel Augusto}, @@ -255,71 +337,3 @@ title = {{The Gamma-count distribution in the analysis of experimental underdisp url = {http://dx.doi.org/10.1080/02664763.2014.922168}, year = {2014} } -@article{Lord2010, -abstract = {The objective of this article is to evaluate the performance of the COM-Poisson GLM for analyzing crash data exhibiting underdispersion (when conditional on the mean). The COM-Poisson distribution, originally developed in 1962, has recently been reintroduced by statisticians for analyzing count data subjected to either over- or underdispersion. Over the last year, the COM-Poisson GLM has been evaluated in the context of crash data analysis and it has been shown that the model performs as well as the Poisson-gamma model for crash data exhibiting overdispersion. To accomplish the objective of this study, several COM-Poisson models were estimated using crash data collected at 162 railway-highway crossings in South Korea between 1998 and 2002. This data set has been shown to exhibit underdispersion when models linking crash data to various explanatory variables are estimated. The modeling results were compared to those produced from the Poisson and gamma probability models documented in a previous published study. The results of this research show that the COM-Poisson GLM can handle crash data when the modeling output shows signs of underdispersion. Finally, they also show that the model proposed in this study provides better statistical performance than the gamma probability and the traditional Poisson models, at least for this data set.}, -author = {Lord, Dominique and Geedipally, Srinivas Reddy and Guikema, Seth D.}, -doi = {10.1111/j.1539-6924.2010.01417.x}, -file = {:home/eduardo/Documents/Mendeley Desktop/Lord, Geedipally, Guikema - 2010 - Extension of the application of conway-maxwell-poisson models Analyzing traffic crash data exhibiting.pdf:pdf}, -isbn = {1539-6924 (Electronic) 0272-4332 (Linking)}, -issn = {02724332}, -journal = {Risk Analysis}, -keywords = {Com-poisson,Conway-Maxwell-Poisson,gamma models,negative binomial models,regression models,underdispersion}, -mendeley-groups = {TCC{\_}UFPR{\_}2015}, -mendeley-tags = {Com-poisson}, -number = {8}, -pages = {1268--1276}, -pmid = {20412518}, -title = {{Extension of the application of conway-maxwell-poisson models: Analyzing traffic crash data exhibiting underdispersion}}, -volume = {30}, -year = {2010} -} -@book{Paula2013, -abstract = {A {\'{a}}rea de modelagem estat{\'{i}}stica de regress{\~{a}}o recebeu um grande impulso desde a cria{\c{c}}{\~{a}}o dos modelos lineares generalizados (MLGs) no in{\'{i}}cio da d{\'{e}}- cada de 70. O crescente interesse pela {\'{a}}rea motivou a realiza{\c{c}}{\~{a}}o de v{\'{a}}rios encontros informais no in{\'{i}}cio dos anos 80, a maioria deles na Inglaterra, at{\'{e}} que em 1986 foi realizado na cidade de Innsbruck na {\'{A}}ustria o “1st Internati- onalWorkshop on Statistical Modelling”(1st IWSM). Esse encontro tem sido realizado anualmente sendo que o {\'{u}}ltimo (25th IWSM) aconteceu em julho de 2010 na Universidade de Glasgow, Esc{\'{o}}cia. O 26th IWSM ser{\'{a}} realizado em julho de 2011 em Val{\^{e}}ncia, Espanha. No Brasil a {\'{a}}rea come{\c{c}}ou efetiva- mente a se desenvolver a partir de meados da d{\'{e}}cada de 80 e em particular ap{\'{o}}s a 1a Escola de Modelos de Regress{\~{a}}o (1EMR) realizada na Universi- dade de S{\~{a}}o Paulo em 1989. As demais escolas ocorreram desde ent{\~{a}}o a cada dois anos sendo que a {\'{u}}ltima (11EMR) foi realizada em mar{\c{c}}o de 2009 na cidade de Recife, PE. A 12EMR ser{\'{a}} realizada em mar{\c{c}}o de 2011 na cidade de Fortaleza, CE.}, -author = {Paula, Gilberto Alvarenga}, -file = {:home/eduardo/Documents/Mendeley Desktop/Paula - 2013 - Modelos de regress{\~{a}}o com apoio computacional.pdf:pdf}, -keywords = {GLM,Regress{\~{a}}o}, -mendeley-groups = {TCC{\_}UFPR{\_}2015}, -mendeley-tags = {GLM,Regress{\~{a}}o}, -publisher = {IME-USP S{\~{a}}o Paulo}, -title = {{Modelos de regress{\~{a}}o com apoio computacional}}, -url = {https://www.ime.usp.br/{~}giapaula/textoregressao.htm}, -year = {2013} -} -@article{Shmueli2005, -abstract = {A useful discrete distribution (the Conway2013Maxwell2013Poisson distribution) is revived and its statistical and probabilistic properties are introduced and explored. This distribution is a two-parameter extension of the Poisson distribution that generalizes some well-known discrete distributions (Poisson, Bernoulli and geometric). It also leads to the generalization of distributions derived from these discrete distributions (i.e. the binomial and negative binomial distributions). We describe three methods for estimating the parameters of the Conway2013Maxwell2013Poisson distribution. The first is a fast simple weighted least squares method, which leads to estimates that are sufficiently accurate for practical purposes. The second method, using maximum likelihood, can be used to refine the initial estimates. This method requires iterations and is more computationally intensive. The third estimation method is Bayesian. Using the conjugate prior, the posterior density of the parameters of the Conway2013Maxwell2013Poisson distribution is easily computed. It is a flexible distribution that can account for overdispersion or underdispersion that is commonly encountered in count data. We also explore two sets of real world data demonstrating the flexibility and elegance of the Conway2013Maxwell2013Poisson distribution in fitting count data which do not seem to follow the Poisson distribution.}, -annote = {Refer{\^{e}}ncia para compoisson package}, -author = {Shmueli, Galit and Minka, Thomas P. and Kadane, Joseph B. and Borle, Sharad and Boatwright, Peter}, -doi = {10.1111/j.1467-9876.2005.00474.x}, -file = {:home/eduardo/Documents/Mendeley Desktop/Shmueli et al. - 2005 - A useful distribution for fitting discrete data Revival of the Conway-Maxwell-Poisson distribution.pdf:pdf}, -isbn = {1467-9876}, -issn = {00359254}, -journal = {Journal of the Royal Statistical Society. Series C: Applied Statistics}, -keywords = {Com-poisson,Conjugate family,Conway-Maxwell-Poisson distribution,Estimation,Exponential family,Overdispersion,Underdispersion}, -mendeley-groups = {TCC{\_}UFPR{\_}2015}, -mendeley-tags = {Com-poisson}, -number = {1}, -pages = {127--142}, -title = {{A useful distribution for fitting discrete data: Revival of the Conway-Maxwell-Poisson distribution}}, -volume = {54}, -year = {2005} -} -@article{Sellers2010, -abstract = {Poisson regression is a popular tool for modeling count data and is applied in a vast array of applications from the social to the physical sciences and beyond. Real data, however, are often over- or under-dispersed and, thus, not conducive to Poisson regression. We propose a regression model based on the Conway--Maxwell-Poisson (COM-Poisson) distribution to address this problem. The COM-Poisson regression generalizes the well-known Poisson and logistic regression models, and is suitable for fitting count data with a wide range of dispersion levels. With a GLM approach that takes advantage of exponential family properties, we discuss model estimation, inference, diagnostics, and interpretation, and present a test for determining the need for a COM-Poisson regression over a standard Poisson regression. We compare the COM-Poisson to several alternatives and illustrate its advantages and usefulness using three data sets with varying dispersion.}, -annote = {Refer{\^{e}}ncia para COMPoissonReg package}, -archivePrefix = {arXiv}, -arxivId = {1011.2077}, -author = {Sellers, Kimberly F. and Shmueli, Galit}, -doi = {10.1214/09-AOAS306}, -eprint = {1011.2077}, -file = {:home/eduardo/Documents/Mendeley Desktop/Sellers, Shmueli - 2010 - A flexible regression model for count data.pdf:pdf;:home/eduardo/Documents/Mendeley Desktop/Sellers, Shmueli - 2010 - A flexible regression model for count data(2).pdf:pdf}, -issn = {19326157}, -journal = {Annals of Applied Statistics}, -keywords = {Com-poisson,Conway-Maxwell-Poisson (COM-Poisson) distribution,Dispersion,Generalized Poisson,Generalized linear models (GLM)}, -mendeley-groups = {TCC{\_}UFPR{\_}2015}, -mendeley-tags = {Com-poisson}, -number = {2}, -pages = {943--961}, -title = {{A flexible regression model for count data}}, -volume = {4}, -year = {2010} -}