Skip to content
Snippets Groups Projects
Commit d7b21f7e authored by Fernando Mayer's avatar Fernando Mayer
Browse files

Merge branch 'issue#11' into 'devel'

Issue#11: Datasets of the chapter 8

This branch brings
  - Datasets of the chapter 8. So, 5 data frames are included with the corresponding `data-raw/*.R` and `data/*.RData` files and documentation on the `R/legTools.R`;
  - @brunaw is included as a collaborator in the DESCRIPTION;

See merge request !13
parents 666fc529 e4ffb417
No related branches found
No related tags found
No related merge requests found
Showing
with 877 additions and 109 deletions
Package: legTools
Title: Convenience Functions, Small GUI to Teach Statistics and Some Datasets
Version: 0.2.0
Authors@R: person("LEG", "Core Team", email = "leg@ufpr.br", role =
c("aut", "cre"))
Authors@R: as.person(c(
"LEG Core Team <leg@ufpr.br> [cre]",
"Walmes Marques Zeviani <walmes@ufpr.br> [aut]",
"Fernando de Pol Mayer <fernando.mayer@ufpr.br> [aut]",
"Bruna Davies Wundervald <brunadaviesw@gmail.com> [ctb]"
))
Description: legTools is a collection of R functions and datasets used for
academic purposes. These functions mainly include small GUI to teach
statistics, conveninece functions to visualize data and datasets, nost part
......
This diff is collapsed.
......@@ -3,6 +3,7 @@
## the package.
##======================================================================
setwd("~/GitLab/legTools/data-raw")
f <- list.files(path="../data", pattern="*.RData")
sapply(f,
......
##----------------------------------------------------------------------
## Data generation.
castorbeansYield <- expand.grid(
variety=c("V 38", "L 41", "L 168", "L 176", "L 178", "L 881",
"L 882", "L 883", "L 1.000"),
loc=c("Ribeirão Preto", "Pindorama", "Mococa", "Tietê",
"Santa Rita"),
KEEP.OUT.ATTRS=FALSE)
castorbeansYield$meanYield <-
c(1186, 1219, 1005, 1264, 1272, 1151, 1246, 1223, 1168, 1460, 1598,
1825, 1394, 1407, 1436, 1291, 1622, 1521, 1832, 1595, 1851, 1613,
1747, 2297, 2233, 2391, 1992, 1644, 1422, 1458, 1567, 1532, 1532,
1683, 1699, 1467, 2192, 2294, 1920, 1856, 2178, 2026, 2458, 2040,
1963)
addmargins(with(castorbeansYield,
tapply(meanYield, list(variety, loc), FUN=sum)))
castorbeansYield <- castorbeansYield[with(castorbeansYield,
order(loc, variety)),]
## Put MSE as an attibute to the data.frame.
mse <- c(29930, 69170, 88210, 35720, 64520)
names(mse) <- levels(castorbeansYield$loc)
attr(castorbeansYield, which="MSE") <- mse
str(castorbeansYield)
save(castorbeansYield, file="../data/castorbeansYield.RData")
##----------------------------------------------------------------------
## Examples.
require(lattice)
data(castorbeansYield)
str(castorbeansYield)
xyplot(meanYield~variety, data=castorbeansYield,
groups=loc, type="o",
ylab=expression(Yield~(t~ha^{-1})),
xlab="Variety")
rm(list=ls())
load("../data/castorbeansYield.RData")
ls()
str(castorbeansYield)
##----------------------------------------------------------------------
## Data generation.
cottonFert <- expand.grid(
## K=c(-1, 1),
## N=c(-1, 1),
trt=c("N1K1", "N1K2", "N2K1", "N2K2", "CTRL"),
rept=1:4,
loc=gl(5, 1),
KEEP.OUT.ATTRS=FALSE)
## x <- scan()
## dput(x/10)
cottonFert$y <- c(4.2, 3.6, 3.2, 3.6, 2.4, 2.4, 2.2, 2.6, 2.8, 1.2, 2.8,
1.8, 3, 3, 3, 3.2, 3.2, 2, 2.4, 2.8, 11, 10, 12, 10.5,
8.5, 10.5, 9.5, 9, 11.5, 8, 9, 9, 9.5, 10.5, 10, 9, 8,
9.5, 10, 7, 7, 8.5, 9, 9, 7, 7.5, 7, 6, 9.5, 7, 6.5,
8, 6, 8, 4.5, 5.5, 5.5, 5.5, 7, 6, 8, 8, 7, 9, 3.5,
6.9, 5.5, 4.7, 6.5, 3, 6, 7.5, 5.5, 5.5, 3.9, 6.5, 8,
9, 7, 7, 1.72, 2.38, 2.52, 2.78, 1.48, 1.81, 2.56,
2.88, 3.01, 1.62, 1.73, 2.48, 2.76, 2.83, 1.58, 1.62,
2.43, 2.54, 2.79, 1.56)
## Check using the totals.
aggregate(y~loc, data=cottonFert, FUN=sum)
str(cottonFert)
save(cottonFert, file="../data/cottonFert.RData")
##----------------------------------------------------------------------
## Examples.
library(lattice)
data(cottonFert)
str(cottonFert)
xyplot(y~trt|loc,
data=cottonFert, type=c("p", "a"),
ylab="y", xlab="Treatment")
xyplot(log(y)~trt|loc,
data=cottonFert, type=c("p", "a"),
ylab="y", xlab="Treatment")
m0 <- by(data=cottonFert, INDICES=cottonFert$loc,
FUN=lm, formula=y~trt)
lapply(m0, anova)
m1 <- lm(y~loc*trt, data=cottonFert)
par(mfrow=c(2,2)); plot(m1); layout(1)
MASS::boxcox(m1)
m2 <- lm(log(y)~loc*trt, data=cottonFert)
par(mfrow=c(2,2)); plot(m2); layout(1)
anova(m2)
##----------------------------------------------------------------------
## Data generation. Pimentel page 149.
peanutYield <- expand.grid(
variety=c("40-Roxo", "54-Roxo", "49-Cateto", "53-Tatu"),
loc=c("Campinas", "Ribeirão Preto", "Pindorama"),
year=c("1941-42", "1942-43", "1949-50"),
KEEP.OUT.ATTRS=FALSE)
peanutYield$meanYield <-
c(1780, 1450, 1430, 790, 690, 470, 520, 280, 4400, 4330, 3440, 3710,
2610, 2590, 2710, 1590, 1570, 1330, 1500, 1170, 1850, 2010, 2240,
1790, 2570, 2320, 2130, 2220, 2650, 2740, 1890, 1570, 2100, 2160,
1570, 870)
addmargins(with(peanutYield,
tapply(meanYield, list(variety, loc), FUN=sum)))
peanutYield <- peanutYield[with(peanutYield,
order(year, loc, variety)),]
str(peanutYield)
## save(peanutYield, file="../data/peanutYield.RData")
##----------------------------------------------------------------------
## Examples.
require(lattice)
data(peanutYield)
str(peanutYield)
xyplot(meanYield~variety|year, data=peanutYield,
groups=loc, type="o",
ylab=expression(Yield~(t~ha^{-1})),
xlab="Variety")
rm(list=ls())
load("../data/peanutYield.RData")
ls()
str(peanutYield)
##----------------------------------------------------------------------
## Data generation. Pimentel page 156.
peanutYield2 <- expand.grid(
variety=c("40-Roxo", "54-Roxo", "49-Cateto", "53-Tatu"),
loc=c("Pindorama 49/50", "Ribeirão Preto 49/50",
"Campinas 48/49", "Campinas 42/43"),
KEEP.OUT.ATTRS=FALSE)
peanutYield2$meanYield <-
c(2100, 2160, 1570, 870, 2650, 2740, 1890, 1570, 2100, 1830, 1890,
1370, 2710, 2610, 2590, 1590)
addmargins(with(peanutYield2,
tapply(meanYield, list(variety, loc), FUN=sum)))
peanutYield2 <- peanutYield2[with(peanutYield2,
order(loc, variety)),]
str(peanutYield2)
## Put MSE as an attibute to the data.frame.
mse <- c(52900, 84700, 3970, 106900)
names(mse) <- levels(peanutYield2$loc)
attr(peanutYield2, which="MSE") <- mse
str(peanutYield2)
save(peanutYield2, file="../data/peanutYield2.RData")
##----------------------------------------------------------------------
## Examples.
require(lattice)
data(peanutYield2)
str(peanutYield2)
xyplot(meanYield~variety, data=peanutYield2,
groups=loc, type="o",
ylab=expression(Yield~(t~ha^{-1})),
xlab="Variety")
rm(list=ls())
load("../data/peanutYield2.RData")
ls()
str(peanutYield2)
##----------------------------------------------------------------------
## Data generation. Pimentel page 147.
potatoYield2 <- expand.grid(
variety=c("Kennebec", "B 25-50 E", "B 1-52", "Huinkul",
"B 116-51", "B 72-53 A", "S. Rafaela", "Buena Vista"),
loc=gl(7, 1),
KEEP.OUT.ATTRS=FALSE)
potatoYield2$sumYield <- c(470, 483, 646, 822, 611, 694, 685, 477,
318, 650, 1201, 1205, 1223, 1112, 1176, 426,
428, 660, 891, 1002, 900, 912, 1018, 497,
584, 780, 928, 970, 954, 865, 703, 682,
364, 356, 386, 558, 546, 450, 558, 356,
482, 358, 439, 624, 523, 519, 488, 496,
492, 583, 940, 929, 928, 797, 929, 532)/10
addmargins(with(potatoYield2,
tapply(sumYield, list(variety, loc), FUN=sum)))
potatoYield2 <- potatoYield2[with(potatoYield2, order(loc, variety)),]
## Put MSE as an attibute to the data.frame.
mse <- c(315, 263, 855, 209, 325, 199, 535)/100
names(mse) <- paste0("loc:", 1:length(mse))
attr(potatoYield2, which="MSE") <- mse
str(potatoYield2)
save(potatoYield2, file="../data/potatoYield2.RData")
##----------------------------------------------------------------------
## Examples.
require(lattice)
data(potatoYield2)
str(potatoYield2)
xyplot(sumYield/4~variety, data=potatoYield2,
groups=loc, type="o",
ylab=expression(Yield~(t~ha^{-1})),
xlab="Variety")
rm(list=ls())
load("../data/potatoYield2.RData")
ls()
str(potatoYield2)
File added
File added
File added
File added
File added
......@@ -21,10 +21,13 @@ These data are from an experiment done by The Brazilian
(t/ha) was recorded in each experimental unit.
\itemize{
\item \code{block} a categorical unordered factor with 4 levels.
\item \code{variety} a categorical unordered factor with 6
levels.
\item \code{variety} a categorical unordered factor with 6 levels.
\item \code{yield} cassava yield (t/ha).
}
}
\examples{
......
% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/legTools.R
\docType{data}
\name{castorbeansYield}
\alias{castorbeansYield}
\title{Castor beans variety competition experiments in some locations}
\format{a \code{data.frame} with 45 records and 3 variables.}
\source{
Pimentel Gomes, F. (2009). Curso de Estatística Experimental
(15th ed.). Piracicaba, São Paulo: FEALQ. (page 149)
Souza, O. Ferreira de.; Canecchio, F. V. (1952). Melhoramento de
mamoeira, VII. Bragantia 12:301-307.
}
\usage{
data(castorbeansYield)
}
\description{
These data are from a set of experiments evaluating
varieties of castor beans in terms of yield (kg/ha) for some
locations (counties).
\itemize{
\item \code{variety} a categorical unordered factor with 8 levels,
varieties and lines of castor beans.
\item \code{loc} a categorical unordered factor with 5 levels, the
locations (counties) experimental stations.
\item \code{meanYield} is the mean of yield for a variety in each
location. So, this the mean across all plots of the same variety
in each experiment.
}
}
\details{
The data in the book was not complete because doesn't report
individual plot values but the mean for a variety in each single
experiment. Neither mention which experimental design was used in
each station. The book report the Mean Square Error estimates for
each experiment. These values as provided as an attribute of the
object, \code{attr(peanut, "MSE")} and they comes from the ANOVA
table corresponding to an appropriate model for each
location. With these MSE is possible use them in a such a way
that a partial ANOVA table can be obtained to test the effect of
location, variety and its interaction.
}
\examples{
require(lattice)
data(castorbeansYield)
str(castorbeansYield)
xyplot(meanYield~variety, data=castorbeansYield,
groups=loc, type="o",
ylab=expression(Yield~(t~ha^{-1})),
xlab="Variety")
}
\keyword{datasets}
......@@ -22,13 +22,19 @@ These data are from a \eqn{2^3} factorial experiment
branches in coffee trees.
\itemize{
\item \code{N} content of nitrogen in the fertilizer (low/high).
\item \code{P} content of phosphorus in the fertilizer (low/high).
\item \code{K} content of potassium in the fertilizer (low/high).
\item \code{block} an unordered factor representing the blocks
used.
\item \code{branches} an integer variable, the number of dry
branches in a coffee the.
}
}
\details{
......
......@@ -18,11 +18,17 @@ These data are from an \eqn{2^3} factorial experiment
(K) on corn yield in a randomized block design.
\itemize{
\item \code{block} a factor with 4 levels.
\item \code{N} low (-1) and high (+1) levels of nitrogen.
\item \code{P} low (-1) and high (+1) levels of phosporus.
\item \code{K} low (-1) and high (+1) levels of potassium.
\item \code{yield} corn yield (ton/ha).
}
}
\examples{
......
......@@ -22,11 +22,17 @@ These data are from an axial 3 factorial experiment
plus presence of limestone.
\itemize{
\item \code{N} content of nitrogen in the fertilizer.
\item \code{P} content of phosphorus in the fertilizer.
\item \code{K} content of potassium in the fertilizer.
\item \code{limestone} presence (1) or absence of limestone (0).
\item \code{acid} mean of corn yield in 16 locations (ton/ha).
}
}
\details{
......
% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/legTools.R
\docType{data}
\name{cottonFert}
\alias{cottonFert}
\title{A set of experiments in different locations studing NK on
cotton}
\format{a \code{data.frame} with 100 records and 4 variables.}
\source{
Pimentel Gomes, F. (2009). Curso de Estatística Experimental
(15th ed.). Piracicaba, São Paulo: FEALQ. (page 142)
}
\usage{
data(cottonFert)
}
\description{
These data is a set of experiments carried out in
different locations studing NK fertilization in cotton. All the 5
experiments are a complete randomized design with 4 replications
and 5 levels of fertilization based on N and K levels and a
control.
\itemize{
\item \code{trt} unordered factor, treatment that consist of 4 cells
from a 2^2 factorial design (\eqn{N\times K}) and a control.
\item \code{rept} integer, indexes experimental units.
\item \code{loc} an unordered factor representing the locations where
the experiment was carried out.
\item \code{y} numeric, the response variable of the experiment. The
text book didn't give details.
}
}
\examples{
library(lattice)
data(cottonFert)
str(cottonFert)
xyplot(y~trt|loc,
data=cottonFert, type=c("p", "a"),
ylab="y", xlab="Treatment")
xyplot(log(y)~trt|loc,
data=cottonFert, type=c("p", "a"),
ylab="y", xlab="Treatment")
}
\keyword{datasets}
......@@ -3,14 +3,14 @@
\docType{data}
\name{defoliation}
\alias{defoliation}
\title{Bolls in cotton as function of artifitial defoliation}
\title{Bolls in cotton as function of artificial defoliation}
\format{a \code{data.frame} with 125 records and 4 variables.}
\usage{
data(defoliation)
}
\description{
This dataset contais the result of a real experiment to
evaluate the effect of artifitial defoliation in combination with
evaluate the effect of artificial defoliation in combination with
phenological stage of occurence on the production of cotton
represented by the number of bolls produced at the end of the
crop cycle. The experiment is a \eqn{5\times 5} factorial with 5
......@@ -21,15 +21,21 @@ This dataset contais the result of a real experiment to
variance less than the sample mean).
\itemize{
\item \code{phenol} a categorical ordered factor with 5 levels
that represent the phenological stages of the cotton plant in
which defoliation was applied.
\item \code{phenol} a categorical ordered factor with 5 levels that
represent the phenological stages of the cotton plant in which
defoliation was applied.
\item \code{defol} a numeric factor with 5 levels that represents the
artifical level of defoliation (percent in leaf area removed with
scissors) applied for all leaves in the plant.
\item \code{rept} index for each experimenal unit in each treatment cell.
\item \code{rept} index for each experimenal unit in each treatment
cell.
\item \code{bolls} the number of bolls produced (count variable)
evaluated at harvest.
}
}
\details{
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment