diff --git a/scripts/03_S_distr.R b/scripts/03_S_distr.R new file mode 100644 index 0000000000000000000000000000000000000000..bc01b9a06974f26719b04cdace20381ba1e4a843 --- /dev/null +++ b/scripts/03_S_distr.R @@ -0,0 +1,239 @@ +#======================================================================= +# Gráfico de controle para bar_X e S. + +#----------------------------------------------------------------------- +# Definições da sessão. + +# Instalar o pacote IQCC. +# install.package("IQCC") +suppressMessages(library(IQCC)) + +library(latticeExtra) + +# Trellis graphical style. +mycol <- c("#E41A1C", "#377EB8", "#4DAF4A", + "#984EA3", "#FF7F00", "#FFFF33") +ps <- list(box.rectangle = list(col = 1, fill = c("gray70")), + box.umbrella = list(col = 1, lty = 1), + dot.symbol = list(col = 1, pch = 19), + dot.line = list(col = "gray50", lty = 3), + plot.symbol = list(col = 1, cex = 0.8), + plot.line = list(col = 1), + plot.polygon = list(col = "gray95"), + superpose.line = list(col = mycol, lty = 1), + superpose.symbol = list(col = mycol, pch = 1), + superpose.polygon = list(col = mycol), + strip.background = list(col = c("gray80", "gray50"))) +trellis.par.set(ps) + +library(reshape) + +#----------------------------------------------------------------------- +# Distribuição amostral de R e S. Qual é o melhor estimador? + +# Diferentes tamanhos de amostra. +n <- c(3, 5, 7, 10, 15, 25) + +# Para cada tamanho de amostra, são geradas 500 amostras e calculados R +# e S. +L <- lapply(n, + FUN = function(n) { + res <- replicate(500, { + c4 <- c4(n) + d2 <- d2(n) + x <- rnorm(n) + r <- diff(range(x)) + s <- sd(x) + c(r = r/d2, s = s/c4) + }) + res <- cbind(n = n, as.data.frame(t(res))) + return(res) + }) + +str(L) + +L <- do.call(rbind, L) + +library(reshape) + +M <- melt(data = L, id.vars = "n", + variable_name = "estim") +str(M) + +# Qual é o estimador mais preciso ou eficiente? +ecdfplot(~value | factor(n), groups = estim, data = M, + scales = "free", + auto.key = TRUE, + as.table = TRUE) + +str(M) + +# names(trellis.par.get()) +# trellis.par.get()$superpose.line$col + +with(M, tapply(value, list(estim, n), mean)) +with(M, tapply(value, list(estim, n), sd)) + +aggregate(M$value[subscripts], + M$estim[subscripts], + FUN = function(x) c(mean(x), sd(x))) + +a <- aggregate(cbind(y = M$value), + by = list(M$estim), + FUN = function(x) c(mean(x), sd(x))) +a + +c(t(apply(a[, -1], 1, FUN = function(x) { x[1] + c(-1, 0, 1) * x[2]}))) + +xtabs(~estim + n, M) + +densityplot(~value | factor(n), groups = estim, data = M, + # scales = "free", + auto.key = TRUE, as.table = TRUE, + plot.points = FALSE, ref = TRUE, + panel=function(x, subscripts, ...){ + panel.densityplot(x, subscripts = subscripts, ...) + a <- aggregate(cbind(M$value[subscripts]), + list(estim = M$estim[subscripts]), + FUN = function(x) c(mean(x), sd(x))) + m <- c(t(apply(a[, -1], 1, + FUN = function(x) { + x[1] + c(-1, 0, 1) * x[2] + }))) + k <- trellis.par.get( + "superpose.line")$col[1:nlevels(M$estim)] + panel.abline(v = m, col = k, lty = 2) + # panel.grid(v = 2) + }) + +bwplot(value ~ estim | factor(n), data = M, pch = "|", + as.table = TRUE, layout = c(NA, 1), + panel = function(x, y, subscripts, ...) { + panel.bwplot(x = x, y = y, subscripts = subscripts, ...) + m <- tapply(y, x, mean) + panel.points(x = 1:nlevels(x), y = m, pch = 19) + }) + layer(panel.abline(h = 1, lty = 2)) + +#----------------------------------------------------------------------- +# Gráfico de controle para n fixo. + +# x <- read.table("clipboard") +# dput(x[, 1]) + +browseURL(paste0("http://bcs.wiley.com/he-bcs/Books?action=", + "chapter&bcsId=7327&itemId=1118146816&chapterId=80371")) +# arquivo: ch06.xlxs, sheet: Exercise Data, coluna: Ex6-15ID (AB). + +x <- c(74.03, 74.002, 74.019, 73.992, 74.008, 73.995, 73.992, 74.001, + 74.011, 74.004, 73.988, 74.024, 74.021, 74.005, 74.002, 74.002, + 73.996, 73.993, 74.015, 74.009, 73.992, 74.007, 74.015, 73.989, + 74.014, 74.009, 73.994, 73.997, 73.985, 73.993, 73.995, 74.006, + 73.994, 74, 74.005, 73.985, 74.003, 73.993, 74.015, 73.988, + 74.008, 73.995, 74.009, 74.005, 74.004, 73.998, 74, 73.99, + 74.007, 73.995, 73.994, 73.998, 73.994, 73.995, 73.99, 74.004, + 74, 74.007, 74, 73.996, 73.983, 74.002, 73.998, 73.997, 74.012, + 74.006, 73.967, 73.994, 74, 73.984, 74.012, 74.014, 73.998, + 73.999, 74.007, 74, 73.984, 74.005, 73.998, 73.996, 73.994, + 74.012, 73.986, 74.005, 74.007, 74.006, 74.01, 74.018, 74.003, + 74, 73.984, 74.002, 74.003, 74.005, 73.997, 74, 74.01, 74.013, + 74.02, 74.003, 73.982, 74.001, 74.015, 74.005, 73.996, 74.004, + 73.999, 73.99, 74.006, 74.009, 74.01, 73.989, 73.99, 74.009, + 74.014, 74.015, 74.008, 73.993, 74, 74.01, 73.982, 73.984, + 73.995, 74.017, 74.013) +length(x) + +n <- rep(1:25, each = 5) + +library(qcc) + +da <- t(unstack(x = data.frame(i = n, x = x), form = x ~ i)) + +obj <- qcc(data = da, type = "xbar") +obj <- qcc(data = da, type = "S") + +#----------------------------------------------------------------------- +# Gráfico de controle para n variável. + +# n <- scan() +# dput(n) +n <- c(5, 3, 5, 5, 5, 4, 4, 5, 4, 5, 5, 5, 3, 5, 3, 5, 4, 5, 5, 3, 5, 5, + 5, 5, 5) + +# Cada amostra separada. +L <- do.call(rbind, + by(data = da, INDICES = da$i, + FUN = function(d) { + i <- d$i[1] + d[1:n[i], ] + })) +str(L) + +# As estimativas de média e desvio-padrão por amostra. +A <- aggregate(x ~ i, data = L, + FUN = function(x) { + c(m = mean(x), s = sd(x)) + }) +str(A) + +# Média das médias: bar_bar_X. +bbx <- sum(A$x[, "m"] * n)/sum(n) + +# Médias dos desvios-padrões: bar_S. +bs <- sqrt(sum((n - 1) * A$x[, "s"]^2)/c(sum(n - 1))) + +# As constantes que dependem do n. +A3 <- 3/(c4(n) * sqrt(n)) +B3 <- 1 - 3 * sqrt(1 - c4(n)^2)/c4(n) +B4 <- 1 + 3 * sqrt(1 - c4(n)^2)/c4(n) + +# Os limites para as duas medidas. +LC <- data.frame(i = 1:length(n), + LCm = bbx, + LICm = bbx - A3 * bs, + LSCm = bbx + A3 * bs, + LCs = bs, + LICs = pmax(0, B3 * bs), + LSCs = B4 * bs) + +# bar_X chart. +matplot(x = LC$i - 0.5, y = LC[, c("LCm", "LICm", "LSCm")], + type = "n", + xlab = "Número da amostra", + ylab = expression(bar(x))) +lines(LC$i, LC$LCm) +with(LC, segments(x0 = i - 0.5, x1 = i + 0.5, y0 = LICm, y1 = LICm)) +with(LC, segments(x0 = i - 0.5, x1 = i + 0.5, y0 = LSCm, y1 = LSCm)) +points(x = LC$i, y = A$x[, "m"], type = "o") + +# S chart. +matplot(x = LC$i - 0.5, + y = LC[, c("LCs", "LICs", "LSCs")], + type = "n", + xlab = "Número da amostra", + ylab = expression(s)) +lines(LC$i, LC$LCs) +with(LC, segments(x0 = i - 0.5, x1 = i + 0.5, y0 = LICs, y1 = LICs)) +with(LC, segments(x0 = i - 0.5, x1 = i + 0.5, y0 = LSCs, y1 = LSCs)) +points(x = LC$i, y = A$x[, "s"], type = "o") + +# DANGER: tem alguma coisa nesse código passo a passo, algum fator de +# escala, por exemplo, que faz com que esses resultados sejam +# ligeiramente diferentes do pacote qqc. + +#----------------------------------------------------------------------- + +# help(qcc, h = "html") +# qcc() +# help.search("rbind.fill") + +sam <- by(L, L$i, + FUN = function(d) { + as.data.frame(matrix(d$x, nrow = 1)) + }) + +db <- do.call(plyr::rbind.fill, sam) + +qcc(data = db, type = "xbar") +qcc(data = db, type = "S") + +#-----------------------------------------------------------------------