From b2edb4e33983067c1bebd8fc569621046316972f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hannes=20M=C3=BChleisen?= <hannes@muehleisen.org> Date: Mon, 7 Apr 2014 13:12:34 +0200 Subject: [PATCH] plot update --- plot.R | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/plot.R b/plot.R index a2e49ad..6d1b5d9 100644 --- a/plot.R +++ b/plot.R @@ -3,7 +3,7 @@ library(ggthemes) library(scales) library(plyr) -setwd("~/Desktop/compare/") +#setwd("~/Desktop/compare/") textsize <- 16 theme <- theme_few(base_size = textsize) + @@ -14,8 +14,18 @@ theme(axis.text.x = element_text(angle = 90, hjust = 1), compare <- read.table("results.tsv",sep="\t",na.strings="") names(compare) <- c ("db","dbver","bmark","sf","phase","q","rep","time") +compare.old <- read.table("results-hot-oldformat.tsv",sep="\t",na.strings="") +names(compare.old) <- c ("db","sf","phase","q","time") + +compare.old <- compare.old[compare.old$phase=="hotruns",] +compare.old$rep <- 0 +compare.old$dbver <- "42" +compare.old$bmark <- "tpch" + +compare <- rbind(compare,compare.old) + # we have a 30 min time limit, so everything over that is a fail -compare[compare$time>1800,]$time <- NA +#compare[compare$time>1800,]$time <- NA levels(compare$db) <- c("Citusdata","MonetDB","PostgreSQL") compare$db <- ordered(compare$db,levels=c("PostgreSQL","Citusdata","MonetDB")) @@ -24,11 +34,11 @@ levels(compare$q) <- toupper(levels(compare$q)) tpcplot <- function(data,filename="out.pdf",sf=1,phase="hotruns",queries=levels(data$q),width=8,ylimit=100,main="",sub="") { pdata <- ddply(data[which(data$sf == as.character(sf) & data$phase==as.character(phase)),], c("db", "q"), summarise, avgtime = mean(time), - se = sd(time) / sqrt(length(time)) ) - pdata <- pdata[pdata$q %in% queries,] - if (nrow(pdata) < 1) stop("No data, dude.") + se = sd(time) / sqrt(length(time)) ) + pdata <- pdata[pdata$q %in% queries,] + if (nrow(pdata) < 1) {warning("No data, dude."); return(NA)} pdata$outlier <- pdata$avgtime > ylimit - pdata[pdata$outlier,]$se <- NA + if (nrow(pdata[pdata$outlier,]) > 0) pdata[pdata$outlier,]$se <- NA pdf(filename,width=width,height=6) dodge <- position_dodge(width=.8) print(ggplot(pdata,aes(x=q,y=avgtime,fill=db)) + @@ -41,18 +51,19 @@ tpcplot <- function(data,filename="out.pdf",sf=1,phase="hotruns",queries=levels( dev.off() } + qss <- c("Q03","Q05","Q06","Q10") # sf1 tpcplot(data=compare,filename="sf1-hot-subset.pdf",sf="1",phase="hotruns",queries=qss,ylimit=4,main="Query Speed (Hot)",sub="TPC-H SF1 (1.1 GB)") tpcplot(data=compare,filename="sf1-hot-all.pdf",sf="1",phase="hotruns",ylimit=25,main="Query Speed (Hot)",sub="TPC-H SF1 (1.1 GB)",width=20) -tpcplot(data=compare,filename="sf1-cold-subset.pdf",sf="1",phase="coldruns",queries=qss,ylimit=4,main="Query Speed (Cold)",sub="TPC-H SF1 (1.1 GB)") +tpcplot(data=compare,filename="sf1-cold-subset.pdf",sf="1",phase="coldruns",queries=qss,ylimit=12,main="Query Speed (Cold)",sub="TPC-H SF1 (1.1 GB)") tpcplot(data=compare,filename="sf1-cold-all.pdf",sf="1",phase="coldruns",ylimit=25,main="Query Speed (Cold)",sub="TPC-H SF1 (1.1 GB)",width=20) # sf5 tpcplot(data=compare,filename="sf5-hot-subset.pdf",sf="5",phase="hotruns",queries=qss,ylimit=20,main="Query Speed (Hot)",sub="TPC-H SF5 (5.2 GB)") tpcplot(data=compare,filename="sf5-hot-all.pdf",sf="5",phase="hotruns",ylimit=80,main="Query Speed (Hot)",sub="TPC-H SF5 (5.2 GB)",width=20) -tpcplot(data=compare,filename="sf5-cold-subset.pdf",sf="5",phase="coldruns",queries=qss,ylimit=20,main="Query Speed (Cold)",sub="TPC-H SF5 (5.2 GB)") +tpcplot(data=compare,filename="sf5-cold-subset.pdf",sf="5",phase="coldruns",queries=qss,ylimit=60,main="Query Speed (Cold)",sub="TPC-H SF5 (5.2 GB)") tpcplot(data=compare,filename="sf5-cold-all.pdf",sf="5",phase="coldruns",ylimit=100,main="Query Speed (Cold)",sub="TPC-H SF5 (5.2 GB)",width=20) # sf10 -- GitLab