From b2edb4e33983067c1bebd8fc569621046316972f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hannes=20M=C3=BChleisen?= <hannes@muehleisen.org>
Date: Mon, 7 Apr 2014 13:12:34 +0200
Subject: [PATCH] plot update

---
 plot.R | 27 +++++++++++++++++++--------
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/plot.R b/plot.R
index a2e49ad..6d1b5d9 100644
--- a/plot.R
+++ b/plot.R
@@ -3,7 +3,7 @@ library(ggthemes)
 library(scales)
 library(plyr)
 
-setwd("~/Desktop/compare/")
+#setwd("~/Desktop/compare/")
 
 textsize <- 16
 theme <- theme_few(base_size = textsize) + 
@@ -14,8 +14,18 @@ theme(axis.text.x = element_text(angle = 90, hjust = 1),
 compare <- read.table("results.tsv",sep="\t",na.strings="")
 names(compare) <- c ("db","dbver","bmark","sf","phase","q","rep","time")
 
+compare.old <- read.table("results-hot-oldformat.tsv",sep="\t",na.strings="")
+names(compare.old) <- c ("db","sf","phase","q","time")
+
+compare.old <- compare.old[compare.old$phase=="hotruns",]
+compare.old$rep <- 0
+compare.old$dbver <- "42"
+compare.old$bmark <- "tpch"
+
+compare <- rbind(compare,compare.old)
+
 # we have a 30 min time limit, so everything over that is a fail
-compare[compare$time>1800,]$time <- NA
+#compare[compare$time>1800,]$time <- NA
 
 levels(compare$db) <- c("Citusdata","MonetDB","PostgreSQL")
 compare$db <- ordered(compare$db,levels=c("PostgreSQL","Citusdata","MonetDB"))
@@ -24,11 +34,11 @@ levels(compare$q) <- toupper(levels(compare$q))
 tpcplot <- function(data,filename="out.pdf",sf=1,phase="hotruns",queries=levels(data$q),width=8,ylimit=100,main="",sub="") {
   pdata <- ddply(data[which(data$sf == as.character(sf) & data$phase==as.character(phase)),], 
                  c("db", "q"), summarise, avgtime = mean(time),
-                 se = sd(time) / sqrt(length(time)) )
-  pdata <- pdata[pdata$q %in% queries,]
-  if (nrow(pdata) < 1) stop("No data, dude.")
+                 se = sd(time) / sqrt(length(time)) )  
+  pdata <- pdata[pdata$q %in% queries,]  
+  if (nrow(pdata) < 1) {warning("No data, dude."); return(NA)}
   pdata$outlier <- pdata$avgtime > ylimit
-  pdata[pdata$outlier,]$se <- NA
+  if (nrow(pdata[pdata$outlier,]) > 0) pdata[pdata$outlier,]$se <- NA
   pdf(filename,width=width,height=6)
   dodge <- position_dodge(width=.8)
   print(ggplot(pdata,aes(x=q,y=avgtime,fill=db)) + 
@@ -41,18 +51,19 @@ tpcplot <- function(data,filename="out.pdf",sf=1,phase="hotruns",queries=levels(
   dev.off()
 }
 
+
 qss <- c("Q03","Q05","Q06","Q10")
 
 # sf1
 tpcplot(data=compare,filename="sf1-hot-subset.pdf",sf="1",phase="hotruns",queries=qss,ylimit=4,main="Query Speed (Hot)",sub="TPC-H SF1 (1.1 GB)")
 tpcplot(data=compare,filename="sf1-hot-all.pdf",sf="1",phase="hotruns",ylimit=25,main="Query Speed (Hot)",sub="TPC-H SF1 (1.1 GB)",width=20)
-tpcplot(data=compare,filename="sf1-cold-subset.pdf",sf="1",phase="coldruns",queries=qss,ylimit=4,main="Query Speed (Cold)",sub="TPC-H SF1 (1.1 GB)")
+tpcplot(data=compare,filename="sf1-cold-subset.pdf",sf="1",phase="coldruns",queries=qss,ylimit=12,main="Query Speed (Cold)",sub="TPC-H SF1 (1.1 GB)")
 tpcplot(data=compare,filename="sf1-cold-all.pdf",sf="1",phase="coldruns",ylimit=25,main="Query Speed (Cold)",sub="TPC-H SF1 (1.1 GB)",width=20)
 
 # sf5
 tpcplot(data=compare,filename="sf5-hot-subset.pdf",sf="5",phase="hotruns",queries=qss,ylimit=20,main="Query Speed (Hot)",sub="TPC-H SF5 (5.2 GB)")
 tpcplot(data=compare,filename="sf5-hot-all.pdf",sf="5",phase="hotruns",ylimit=80,main="Query Speed (Hot)",sub="TPC-H SF5 (5.2 GB)",width=20)
-tpcplot(data=compare,filename="sf5-cold-subset.pdf",sf="5",phase="coldruns",queries=qss,ylimit=20,main="Query Speed (Cold)",sub="TPC-H SF5 (5.2 GB)")
+tpcplot(data=compare,filename="sf5-cold-subset.pdf",sf="5",phase="coldruns",queries=qss,ylimit=60,main="Query Speed (Cold)",sub="TPC-H SF5 (5.2 GB)")
 tpcplot(data=compare,filename="sf5-cold-all.pdf",sf="5",phase="coldruns",ylimit=100,main="Query Speed (Cold)",sub="TPC-H SF5 (5.2 GB)",width=20)
 
 # sf10
-- 
GitLab