diff --git a/ehannes.sh b/ehannes.sh index fdb0aeef8a06a33193c505964ac10e49d4ba22d6..682f40f4aaaa72f21a4cf226b9340cd40dfee0b0 100755 --- a/ehannes.sh +++ b/ehannes.sh @@ -1,5 +1,72 @@ + #!/bin/bash -# 2014-04-04 Hannes Muehleisen <hannes@cwi.nl> +#set -x + +# command line parameter parsing fun +usage() { echo "Usage: $0 -s <scale factors> -d <databases to test> -p <directory prefix>" 1>&2; exit 1; } + +while getopts ":s:d:p:" o; do + case "${o}" in + s) + s=${OPTARG} + ;; + d) + d=${OPTARG} + ;; + p) + p=${OPTARG} + ;; + *) + usage + ;; + esac +done +shift $((OPTIND-1)) + +if [ -z "${p}" ] ; then + echo "-p is required. Example: -p /tmp/ehannes/" 1>&2; + usage +fi + +mkdir -p $p +if [ ! -d "${p}" ] ; then + echo "Directory $p does not exist and cannot be created." 1>&2; + usage +fi + +if [ -z "${s}" ] ; then + echo "-s is required. Example: -s \"1 3 10\"" 1>&2; + usage +fi +for SF in $s +do + if [ $SF -lt 1 ] ; then + echo "Invalid value for scale factor: $SF" 1>&2; + exit 1 + fi +done + +if [ -z "${d}" ] ; then + echo "-d is required. Example: -d \"monetdb postgres\"" 1>&2; + usage +fi + +for DB in $d +do + echo $DB | grep "monetdb\|postgres\|mariadb\|citusdb" > /dev/null + if [ ! $? -eq 0 ] ; then + echo "Invalid value for database: $DB" 1>&2; + exit 1 + fi +done + +echo "TPC-H DB comparision script, <hannes@cwi.nl> 2014" +echo +echo "Testing databases $d" +echo "Testing scale factors $s" +echo "Using prefix directory $p" + + # db versions # PostgreSQL @@ -13,7 +80,7 @@ MAVER=10.0.10 PBVER=2.5.0 PBCVER=0.15 -DIR=/export/scratch2/hannes/compare/ +DIR=$p SDIR=$DIR/.sources IDIR=$DIR/.install @@ -28,155 +95,13 @@ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PBINS/lib:$PBCINS/lib mkdir -p $SDIR mkdir -p $IDIR -DROPCACHE="echo 3 | sudo /usr/bin/tee /proc/sys/vm/drop_caches" - # clean up source dir first rm -rf $SDIR/* # remove this thing to force a rebuild of the citusdata extension, it might change quickly rm $PINS/lib/cstore_fdw.so -# MonetDB installer -if [ ! -f $MINS/bin/mserver5 ] ; then - rm -rf $MINS - MURL=http://www.monetdb.org/downloads/sources/Latest/MonetDB-$MVER.tar.bz2 - wget $MURL -P $SDIR --no-check-certificate - tar xvf $SDIR/MonetDB-*.tar.* -C $SDIR - MSRC=$SDIR/MonetDB-$MVER/ - cd $MSRC - ./configure --prefix=$MINS --enable-rubygem=no --enable-python3=no --enable-python2=no --enable-perl=no --enable-geos=no --enable-python=no --enable-geom=no --enable-fits=no --enable-jaql=no --enable-gsl=no --enable-odbc=no --enable-jdbc=no --enable-merocontrol=no - make -j install - cd $DIR - rm -rf $MSRC $SDIR/MonetDB-*.tar.* -fi - -# PostgreSQL installer -if [ ! -f $PINS/bin/postgres ] ; then - rm -rf $PINS - PGURL=http://ftp.postgresql.org/pub/source/v$PGVER/postgresql-$PGVER.tar.gz - wget $PGURL -P $SDIR - tar xvf $SDIR/postgresql-*.tar.* -C $SDIR - PSRC=$SDIR/postgresql-$PGVER/ - cd $PSRC - ./configure --prefix=$PINS - make - make install - cd $DIR - rm -rf $PSRC $SDIR/postgresql-*.tar.* -fi - -# Citusdata installer -if [ ! -f $PINS/lib/cstore_fdw.so ] ; then - git clone https://github.com/citusdata/cstore_fdw/ $SDIR/cstore_fdw - if [ ! -f $PBINS/bin/protoc ] || [ ! -f $PBCINS/bin/protoc-c ] ; then - wget https://protobuf.googlecode.com/files/protobuf-$PBVER.tar.gz -P $SDIR - wget https://protobuf-c.googlecode.com/files/protobuf-c-$PBCVER.tar.gz -P $SDIR - tar xvf $SDIR/protobuf-$PBVER.tar.gz -C $SDIR - tar xvf $SDIR/protobuf-c-$PBCVER.tar.gz -C $SDIR - - # protobuf and protbuf-c are dependencies of citusdb-store - PBSRC=$SDIR/protobuf-$PBVER/ - cd $PBSRC - ./configure --prefix=$PBINS - make -j install - - PBCSRC=$SDIR/protobuf-c-$PBCVER/ - cd $PBCSRC - ./configure --prefix=$PBCINS CXXFLAGS=-I$IDIR/protobuf-$PBVER/include LDFLAGS=-L$IDIR/protobuf-$PBVER/lib PATH=$PATH:$PBINS/bin/ - make -j install - fi - # cstore is a pgplugin - CSRC=$SDIR/cstore_fdw - cd $CSRC - # some funny include path messing - PATH=$PATH:$PINS/bin/:$PBCINS/bin/ CPATH=$CPATH:$PBCINS/include LIBRARY_PATH=$LIBRARY_PATH:$PBCINS/lib make -j install - cd $DIR - rm -rf $CSRC $PBCSRC $PBSRC $SDIR/protobuf-*.tar.* -fi - -# MariaDB installer -if [ ! -f $MAINS/bin/mysqld ] ; then - rm -rf $MAINS - MAURL=http://mariadb.mirror.triple-it.nl//mariadb-$MAVER/kvm-tarbake-jaunty-x86/mariadb-$MAVER.tar.gz - wget $MAURL -P $SDIR - tar xvf $SDIR/mariadb-*.tar.* -C $SDIR - MASRC=$SDIR/mariadb-$MAVER/ - cd $MASRC - cmake -DCMAKE_INSTALL_PREFIX:PATH=$MAINS . - make - make install - cd $DIR - rm -rf $MASRC $SDIR/mariadb-*.tar.* -fi - -# TPC-H dbgen installer -if [ ! -f $IDIR/dbgen/dbgen ] ; then - rm -rf $IDIR/dbgen/ - wget http://www.tpc.org/tpch/spec/tpch_2_16_1.zip -P $SDIR - unzip $SDIR/tpch_*.zip -d $SDIR - cd $SDIR/tpch_2_16_1/dbgen - sed -e 's/DATABASE\s*=/DATABASE=DB2/' -e 's/MACHINE\s*=/MACHINE=LINUX/' -e 's/WORKLOAD\s*=/WORKLOAD=TPCH/' -e 's/CC\s*=/CC=gcc/' makefile.suite > Makefile - make - mkdir $IDIR/dbgen/ - cp dbgen dists.dss $IDIR/dbgen/ - rm -rf $SDIR/tpch_* -fi - -# in case something was left -rm -rf $SDIR/* - - -# some sys setup for PostgreSQL according to Dr. Kyzirakos (TM) - -### RAM -## 4 GB of RAM -#shared_buffers = 3GB -#effective_cache_size = 3GB -#maintenance_work_mem = 1GB -#work_mem = 2GB -## 8 GB of RAM -#shared_buffers = 5GB -#effective_cache_size = 6GB -#maintenance_work_mem = 2GB -#work_mem = 5GB -## 16 GB of RAM -#shared_buffers = 10GB -#effective_cache_size = 14GB -#maintenance_work_mem = 4GB -#work_mem = 10GB -## 24 GB of RAM -#shared_buffers = 16GB -#effective_cache_size = 22GB -#maintenance_work_mem = 6GB -#work_mem = 15GB -## 48 GB of RAM -#shared_buffers = 32GB -#effective_cache_size = 46GB -#maintenance_work_mem = 8GB -#work_mem = 30GB - -## 4 GB of RAM -#kernel.shmmax = 3758096384 -#kernel.shmall = 3758096384 -#kernel.shmmni = 4096 -## 8 GB of RAM -#kernel.shmmax = 5905580032 -#kernel.shmall = 5905580032 -#kernel.shmmni = 4096 -## 16 GB of RAM -#kernel.shmmax = 11274289152 -#kernel.shmall = 11274289152 -#kernel.shmmni = 4096 -## 24 GB of RAM -#kernel.shmmax = 17716740096 -#kernel.shmall = 17716740096 -#kernel.shmmni = 4096 -## 48 GB of RAM -#kernel.shmmax = 35433480192 -#kernel.shmall = 35433480192 -#kernel.shmmni = 4224 -## 64 GB of RAM - +# some setup for PostgreSQL according to Dr. Kyzirakos (TM) # here: 16 GB pg_shared_buffers=10GB pg_effective_cache_size=6GB @@ -186,8 +111,6 @@ pg_work_mem=5GB #sudo bash -c "sysctl -w kernel.shmall=11274289152" #sudo bash -c "sysctl -w kernel.shmmni=4096" -# okay, run tpc-h - DDIR=$DIR/.data SCDIR=$DIR/scripts QYDIR=$DIR/queries @@ -208,12 +131,63 @@ BMARK="tpch" TIMINGCMD="/usr/bin/time -o $DIR/.time -f %e " TIMEOUTCMD="timeout -k 35m 30m " -for SF in 1 #5 10 +# http://mywiki.wooledge.org/BashFAQ/050 +dropCache() { + echo 3 | sudo /usr/bin/tee /proc/sys/vm/drop_caches +} + +runQuery() { # runQuery PHASE REP DROPC + for REP in {1..$2} + do + for QFILE in $QYDIR/q??.sql + do + if [ "$3" -gt 0 ] ; then + dropCache + # startup db + eval "$SERVERCMD$DBFARM > /dev/null &" + sleep 5 + fi + + q=${QFILE%.sql} + qn=`basename $q` + # run query + eval "$TIMEOUTCMD$TIMINGCMD$CLIENTCMD$QFILE" > $QRDIR/$DB-SF$SF-coldrun$coldrun-$qn.out + QTIME=`cat $DIR/.time` + echo -e "$LOGPREFIX\t$1\t$qn\t$REP\t$QTIME" >> $RESFL + + if [ "$3" -gt 0 ] ; then + # shutdown db + shutdown + sleep 5 + fi + + done + done +} + +for SF in $s do # check if we have data SFDDIR=$DDIR/sf-$SF/ # if not, generate if [ ! -f $SFDDIR/lineitem.tbl ] ; then + # TPC-H dbgen installer + if [ ! -f $IDIR/dbgen/dbgen ] ; then + rm -rf $IDIR/dbgen/ + wget http://www.tpc.org/tpch/spec/tpch_2_16_1.zip -P $SDIR + unzip $SDIR/tpch_*.zip -d $SDIR + cd $SDIR/tpch_2_16_1/dbgen + sed -e 's/DATABASE\s*=/DATABASE=DB2/' -e 's/MACHINE\s*=/MACHINE=LINUX/' -e 's/WORKLOAD\s*=/WORKLOAD=TPCH/' -e 's/CC\s*=/CC=gcc/' makefile.suite > Makefile + make + mkdir $IDIR/dbgen/ + cp dbgen dists.dss $IDIR/dbgen/ + rm -rf $SDIR/tpch_* + fi + if [ ! -f $IDIR/dbgen/dbgen ] ; then + echo "Failed to install TPCH dbgen" + exit -1 + fi + cd $IDIR/dbgen/ ./dbgen -vf -s $SF mkdir -p $SFDDIR @@ -222,20 +196,68 @@ do mv *.tbl $SFDDIR fi cd $DIR - for DB in postgres citusdata postgres monetdb # mariadb + for DB in $d # postgres citusdata postgres monetdb do DBNAME=$DB-sf$SF DBFARM=$FARM/$DBNAME/ #rm -rf $DBFARM/* if [ "$DB" == "monetdb" ]; then + # MonetDB installer + if [ ! -f $MINS/bin/mserver5 ] ; then + rm -rf $MINS + MURL=http://www.monetdb.org/downloads/sources/Latest/MonetDB-$MVER.tar.bz2 + wget $MURL -P $SDIR --no-check-certificate + tar xvf $SDIR/MonetDB-*.tar.* -C $SDIR + MSRC=$SDIR/MonetDB-$MVER/ + cd $MSRC + ./configure --prefix=$MINS --enable-rubygem=no --enable-python3=no --enable-python2=no --enable-perl=no --enable-geos=no --enable-python=no --enable-geom=no --enable-fits=no --enable-jaql=no --enable-gsl=no --enable-odbc=no --enable-jdbc=no --enable-merocontrol=no + make -j install + cd $DIR + rm -rf $MSRC $SDIR/MonetDB-*.tar.* + fi + if [ ! -f $MINS/bin/mserver5 ] ; then + echo "Failed to install MonetDB" + exit -1 + fi + SERVERCMD="$MINS/bin/mserver5 --set mapi_port=$PORT --daemon=yes --dbpath=" - CLIENTCMD="$MINS/bin/mclient -p $PORT " + CLIENTCMD="$MINS/bin/mclient -fcsv -p $PORT " INITFCMD="echo " CREATEDBCMD="echo createdb" + shutdown() { + kill $! + sleep 10 + kill -9 $! + } DBVER=$MVER fi + if [ "$DB" == "postgres" ] || [ "$DB" == "citusdata" ]; then + # PostgreSQL installer + if [ ! -f $PINS/bin/postgres ] ; then + rm -rf $PINS + PGURL=http://ftp.postgresql.org/pub/source/v$PGVER/postgresql-$PGVER.tar.gz + wget $PGURL -P $SDIR + tar xvf $SDIR/postgresql-*.tar.* -C $SDIR + PSRC=$SDIR/postgresql-$PGVER/ + cd $PSRC + ./configure --prefix=$PINS + make + make install + cd $DIR + rm -rf $PSRC $SDIR/postgresql-*.tar.* + fi + if [ ! -f $PINS/bin/postgres ] ; then + echo "Failed to install PostgreSQL" + exit -1 + fi + + # only preload citusdata lib if it exists + PGPRELOAD="" + if [ -f $PINS/lib/cstore_fdw.so ] ; then + PGPRELOAD="-c shared_preload_libraries=cstore_fdw" + fi SERVERCMD="$PINS/bin/postgres -p $PORT \ -c autovacuum=off \ -c random_page_cost=3.5 \ @@ -248,119 +270,191 @@ do -c wal_buffers=32MB \ -c checkpoint_segments=64 \ -c max_connections=10 \ - -c shared_preload_libraries=cstore_fdw \ + $PGPRELOAD \ -D " CLIENTCMD="$PINS/bin/psql -p $PORT tpch -t -A -F , -f " INITFCMD="$PINS/bin/initdb -D " CREATEDBCMD="$PINS/bin/createdb -p $PORT tpch" + shutdown() { + kill -INT $! + } DBVER=$PGVER fi + # TODO: does postgres run with a missing shared_preload? if [ "$DB" == "citusdata" ]; then + # Citusdata installer + if [ ! -f $PINS/lib/cstore_fdw.so ] ; then + git clone https://github.com/citusdata/cstore_fdw/ $SDIR/cstore_fdw + if [ ! -f $PBINS/bin/protoc ] || [ ! -f $PBCINS/bin/protoc-c ] ; then + wget https://protobuf.googlecode.com/files/protobuf-$PBVER.tar.gz -P $SDIR + wget https://protobuf-c.googlecode.com/files/protobuf-c-$PBCVER.tar.gz -P $SDIR + tar xvf $SDIR/protobuf-$PBVER.tar.gz -C $SDIR + tar xvf $SDIR/protobuf-c-$PBCVER.tar.gz -C $SDIR + + # protobuf and protbuf-c are dependencies of citusdb-store + PBSRC=$SDIR/protobuf-$PBVER/ + cd $PBSRC + ./configure --prefix=$PBINS + make -j install + + PBCSRC=$SDIR/protobuf-c-$PBCVER/ + cd $PBCSRC + ./configure --prefix=$PBCINS CXXFLAGS=-I$IDIR/protobuf-$PBVER/include LDFLAGS=-L$IDIR/protobuf-$PBVER/lib PATH=$PATH:$PBINS/bin/ + make -j install + fi + # cstore is a pgplugin + CSRC=$SDIR/cstore_fdw + cd $CSRC + # some funny include path messing + PATH=$PATH:$PINS/bin/:$PBCINS/bin/ CPATH=$CPATH:$PBCINS/include LIBRARY_PATH=$LIBRARY_PATH:$PBCINS/lib make -j install + cd $DIR + rm -rf $CSRC $PBCSRC $PBSRC $SDIR/protobuf-*.tar.* + fi + if [ ! -f $PINS/lib/cstore_fdw.so ] ; then + echo "Failed to install CitusDB" + exit -1 + fi DBVER=snapshot-`date +"%Y-%m-%d"` fi if [ "$DB" == "mariadb" ] ; then - SERVERCMD="$MAINS/bin/mysqld --basedir=$MAINS -P $PORT --datadir=" - CLIENTCMD="$MAINS/bin/mysql -P $PORT -N tpcd -B <" - INITFCMD="" - CREATEDBCMD="" + # MariaDB installer + if [ ! -f $MAINS/bin/mysqld ] ; then + rm -rf $MAINS + MAURL=http://mariadb.mirror.triple-it.nl//mariadb-$MAVER/kvm-tarbake-jaunty-x86/mariadb-$MAVER.tar.gz + wget $MAURL -P $SDIR + tar xvf $SDIR/mariadb-*.tar.* -C $SDIR + MASRC=$SDIR/mariadb-$MAVER/ + cd $MASRC + cmake -DCMAKE_INSTALL_PREFIX:PATH=$MAINS . + make + make install + cd $DIR + rm -rf $MASRC $SDIR/mariadb-*.tar.* + fi + + if [ ! -f $MAINS/bin/mysqld ] ; then + echo "Failed to install MariaDB" + exit -1 + fi + + DBSOCK=$DIR/.mariadb.socket + SERVERCMD="$MAINS/bin/mysqld \ + --lower_case_table_names=1 \ + --basedir=$MAINS -P $PORT --pid-file=$DIR/.mariadb.pid --socket=$DBSOCK --datadir=" + CLIENTCMD="$MAINS/bin/mysql -u root --socket=$DBSOCK tpch < " + INITFCMD="$MAINS/scripts/mysql_install_db --basedir=$MAINS --datadir=" + CREATEDBCMD="$MAINS/bin/mysqladmin -u root --socket=$DBSOCK create tpch" + shutdown() { + $MAINS/bin/mysqladmin -u root --socket=$DBSOCK shutdown + } DBVER=$MAVER fi + LOGPREFIX="$DB\t$DBVER\t$BMARK\t$SF" if [ ! -d $DBFARM ] ; then # clear caches (fair loading) - echo 3 | sudo /usr/bin/tee /proc/sys/vm/drop_caches + dropCache mkdir -p $DBFARM # initialize db directory - $INITFCMD$DBFARM + eval "$INITFCMD$DBFARM" # start db server - $SERVERCMD$DBFARM > /dev/null & + eval "$SERVERCMD$DBFARM > /dev/null &" sleep 5 # create db (if applicable) - $CREATEDBCMD + eval "$CREATEDBCMD" # create schema sed -e "s|DIR|$DBFARM|" $SCDIR/$DB.schema.sql > $DIR/.$DB.schema.sql.local - $CLIENTCMD $DIR/.$DB.schema.sql.local > /dev/null + eval "$CLIENTCMD$DIR/.$DB.schema.sql.local" > /dev/null # load data sed -e "s|DIR|$SFDDIR|" $SCDIR/$DB.load.sql > $DIR/.$DB.load.sql.local - $TIMINGCMD $CLIENTCMD $DIR/.$DB.load.sql.local > /dev/null + eval "$TIMINGCMD$CLIENTCMD$DIR/.$DB.load.sql.local" > /dev/null LDTIME=`cat $DIR/.time` - echo -e "$DB\t$DBVER\t$BMARK\t$SF\tload\t\t\t$LDTIME" >> $RESFL + echo -e "$LOGPREFIX\tload\t\t\t$LDTIME" >> $RESFL # constraints - $TIMINGCMD $CLIENTCMD $SCDIR/$DB.constraints.sql > /dev/null + eval "$TIMINGCMD$CLIENTCMD$SCDIR/$DB.constraints.sql" > /dev/null CTTIME=`cat $DIR/.time` - echo -e "$DB\t$DBVER\t$BMARK\t$SF\tconstraints\t\t\t$CTTIME" >> $RESFL + echo -e "$LOGPREFIX\tconstraints\t\t\t$CTTIME" >> $RESFL # analyze/vacuum - $TIMINGCMD $CLIENTCMD $SCDIR/$DB.analyze.sql > /dev/null + eval "$TIMINGCMD$CLIENTCMD$SCDIR/$DB.analyze.sql" > /dev/null AZTIME=`cat $DIR/.time` - echo -e "$DB\t$DBVER\t$BMARK\t$SF\tanalyze\t\t\t$AZTIME" >> $RESFL + echo -e "$LOGPREFIX\tanalyze\t\t\t$AZTIME" >> $RESFL - # aand restart - kill `jobs -p` - sleep 10 + shutdown fi - # we start with cold runs - # clear caches (fair loading) - for REP in {1..5} - do - for i in $QYDIR/q??.sql - do - echo 3 | sudo /usr/bin/tee /proc/sys/vm/drop_caches - $SERVERCMD$DBFARM > /dev/null & - sleep 5 - q=${i%.sql} - qn=`basename $q` - $TIMEOUTCMD $TIMINGCMD $CLIENTCMD $i > $QRDIR/$DB-SF$SF-coldrun$coldrun-q$qn.out - QTIME=`cat $DIR/.time` - echo -e "$DB\t$DBVER\t$BMARK\t$SF\tcoldruns\t$qn\t$REP\t$QTIME" >> $RESFL - kill `jobs -p` - sleep 10 - kill -9 `jobs -p` - sleep 10 - done - - done - # warmup... - $SERVERCMD$DBFARM > /dev/null & + runQuery "coldruns" 5 1 + + eval "$SERVERCMD$DBFARM > /dev/null &" sleep 5 - for REP in {1..2} - do - for i in $QYDIR/q??.sql - do - q=${i%.sql} - qn=`basename $q` - $TIMEOUTCMD $TIMINGCMD $CLIENTCMD $i > $QRDIR/$DB-SF$SF-warmup$warmup-q$qn.out - QTIME=`cat $DIR/.time` - echo -e "$DB\t$DBVER\t$BMARK\t$SF\twarmup\t$qn\t$REP\t$QTIME" >> $RESFL - done - done - # hot runs! - for REP in {1..5} - do - for i in $QYDIR/q??.sql - do - q=${i%.sql} - qn=`basename $q` - $TIMEOUTCMD $TIMINGCMD $CLIENTCMD $i > $QRDIR/$DB-SF$SF-hotrun$hotrun-q$qn.out - QTIME=`cat $DIR/.time` - echo -e "$DB\t$DBVER\t$BMARK\t$SF\thotruns\t$qn\t$REP\t$QTIME" >> $RESFL - done - done - kill `jobs -p` - sleep 10 + runQuery "warmup" 2 0 + runQuery "hotruns" 5 0 + + shutdown + sleep 5 done - done rm $DIR/.*.sql.local rm $DIR/.time +rm -rf $SDIR/* + + + +### RAM +## 4 GB of RAM +#shared_buffers = 3GB +#effective_cache_size = 3GB +#maintenance_work_mem = 1GB +#work_mem = 2GB +## 8 GB of RAM +#shared_buffers = 5GB +#effective_cache_size = 6GB +#maintenance_work_mem = 2GB +#work_mem = 5GB +## 16 GB of RAM +#shared_buffers = 10GB +#effective_cache_size = 14GB +#maintenance_work_mem = 4GB +#work_mem = 10GB +## 24 GB of RAM +#shared_buffers = 16GB +#effective_cache_size = 22GB +#maintenance_work_mem = 6GB +#work_mem = 15GB +## 48 GB of RAM +#shared_buffers = 32GB +#effective_cache_size = 46GB +#maintenance_work_mem = 8GB +#work_mem = 30GB + +## 4 GB of RAM +#kernel.shmmax = 3758096384 +#kernel.shmall = 3758096384 +#kernel.shmmni = 4096 +## 8 GB of RAM +#kernel.shmmax = 5905580032 +#kernel.shmall = 5905580032 +#kernel.shmmni = 4096 +## 16 GB of RAM +#kernel.shmmax = 11274289152 +#kernel.shmall = 11274289152 +#kernel.shmmni = 4096 +## 24 GB of RAM +#kernel.shmmax = 17716740096 +#kernel.shmall = 17716740096 +#kernel.shmmni = 4096 +## 48 GB of RAM +#kernel.shmmax = 35433480192 +#kernel.shmall = 35433480192 +#kernel.shmmni = 4224 +## 64 GB of RAM diff --git a/plot.R b/plot.R new file mode 100644 index 0000000000000000000000000000000000000000..7fdc25245adaeea76798ff3a09a7f8823c5cb13c --- /dev/null +++ b/plot.R @@ -0,0 +1,74 @@ +library(ggplot2) +library(ggthemes) +library(scales) +library(plyr) + +#setwd("~/Desktop/compare/") +setwd("~/git/monetdb-postgres-compare/results/results-2014-04-07") + +textsize <- 16 +theme <- theme_few(base_size = textsize) + +theme(axis.text.x = element_text(angle = 90, hjust = 1), + legend.title=element_blank(), + legend.position=c(0.85,0.08)) + +compare <- read.table("results.tsv",sep="\t",na.strings="") +names(compare) <- c ("db","dbver","bmark","sf","phase","q","rep","time") + +#compare.old <- read.table("results-hot-oldformat.tsv",sep="\t",na.strings="") +#names(compare.old) <- c ("db","sf","phase","q","time") + +#compare.old <- compare.old[compare.old$phase=="hotruns",] +#compare.old$rep <- 0 +#compare.old$dbver <- "42" +#compare.old$bmark <- "tpch" + +#compare <- rbind(compare,compare.old) + +# we have a 30 min time limit, so everything over that is a fail +#compare[compare$time>1800,]$time <- NA + +levels(compare$db) <- c("Citusdata","MonetDB","PostgreSQL") +compare$db <- ordered(compare$db,levels=c("PostgreSQL","Citusdata","MonetDB")) +levels(compare$q) <- toupper(levels(compare$q)) + +tpcplot <- function(data,filename="out.pdf",sf=1,phase="hotruns",queries=levels(data$q),width=8,ylimit=100,main="",sub="") { + pdata <- ddply(data[which(data$sf == as.character(sf) & data$phase==as.character(phase)),], + c("db", "q"), summarise, avgtime = mean(time), + se = sd(time) / sqrt(length(time)) ) + pdata <- pdata[pdata$q %in% queries,] + if (nrow(pdata) < 1) {warning("No data, dude."); return(NA)} + pdata$outlier <- pdata$avgtime > ylimit + if (nrow(pdata[pdata$outlier,]) > 0) pdata[pdata$outlier,]$se <- NA + pdf(filename,width=width,height=6) + dodge <- position_dodge(width=.8) + print(ggplot(pdata,aes(x=q,y=avgtime,fill=db)) + + geom_bar(width=.65,position = dodge,stat="identity") + scale_y_continuous(limits = c(0, ylimit),oob=squish) + + geom_errorbar(aes(ymin=avgtime-se, ymax=avgtime+se), width=0.07,position=dodge) + + ggtitle(bquote(atop(.(main), atop(.(sub), "")))) + xlab("") + ylab("Duration (seconds)") + + scale_fill_manual(values = c("PostgreSQL" = "#2f7ed8", "Citusdata" = "#AA4643","MonetDB" = "#568203")) + + theme_few(base_size = textsize) + theme(legend.position="bottom", legend.title=element_blank(), panel.border = element_blank(),axis.line = element_line(colour = "black")) + + geom_text(aes(label=ifelse(outlier, paste0("^ ",round(avgtime),"s"), ""), hjust=.5,vjust=-.2), position = dodge)) + dev.off() +} + + +qss <- c("Q03","Q05","Q06","Q10") + +# sf1 +tpcplot(data=compare,filename="sf1-hot-subset.pdf",sf="1",phase="hotruns",queries=qss,ylimit=4,main="Query Speed (Hot)",sub="TPC-H SF1 (1.1 GB)") +tpcplot(data=compare,filename="sf1-hot-all.pdf",sf="1",phase="hotruns",ylimit=25,main="Query Speed (Hot)",sub="TPC-H SF1 (1.1 GB)",width=20) +tpcplot(data=compare,filename="sf1-cold-subset.pdf",sf="1",phase="coldruns",queries=qss,ylimit=12,main="Query Speed (Cold)",sub="TPC-H SF1 (1.1 GB)") +tpcplot(data=compare,filename="sf1-cold-all.pdf",sf="1",phase="coldruns",ylimit=25,main="Query Speed (Cold)",sub="TPC-H SF1 (1.1 GB)",width=20) + +# sf5 +tpcplot(data=compare,filename="sf5-hot-subset.pdf",sf="5",phase="hotruns",queries=qss,ylimit=20,main="Query Speed (Hot)",sub="TPC-H SF5 (5.2 GB)") +tpcplot(data=compare,filename="sf5-hot-all.pdf",sf="5",phase="hotruns",ylimit=80,main="Query Speed (Hot)",sub="TPC-H SF5 (5.2 GB)",width=20) +tpcplot(data=compare,filename="sf5-cold-subset.pdf",sf="5",phase="coldruns",queries=qss,ylimit=60,main="Query Speed (Cold)",sub="TPC-H SF5 (5.2 GB)") +tpcplot(data=compare,filename="sf5-cold-all.pdf",sf="5",phase="coldruns",ylimit=100,main="Query Speed (Cold)",sub="TPC-H SF5 (5.2 GB)",width=20) + +# sf10 +tpcplot(data=compare,filename="sf10-hot-subset.pdf",sf="10",phase="hotruns",queries=qss,ylimit=40,main="Query Speed (Hot)",sub="TPC-H SF10 (11 GB)") +tpcplot(data=compare,filename="sf10-hot-all.pdf",sf="10",phase="hotruns",ylimit=100,main="Query Speed (Hot)",sub="TPC-H SF10 (11 GB)",width=20) +tpcplot(data=compare,filename="sf10-cold-subset.pdf",sf="10",phase="coldruns",queries=qss,ylimit=70,main="Query Speed (Cold)",sub="TPC-H SF10 (11 GB)") +tpcplot(data=compare,filename="sf10-cold-all.pdf",sf="10",phase="coldruns",ylimit=100,main="Query Speed (Cold)",sub="TPC-H SF10 (11 GB)",width=20) diff --git a/queries/q17.sql.disabled b/queries/q17.sql similarity index 100% rename from queries/q17.sql.disabled rename to queries/q17.sql diff --git a/queries/q20.sql.disabled b/queries/q20.sql similarity index 100% rename from queries/q20.sql.disabled rename to queries/q20.sql diff --git a/scripts/mariadb.analyze.sql b/scripts/mariadb.analyze.sql new file mode 100644 index 0000000000000000000000000000000000000000..9009be719f61507e1f81c03a2f09d21fe6f8ece4 --- /dev/null +++ b/scripts/mariadb.analyze.sql @@ -0,0 +1,8 @@ +analyze table customer; +analyze table lineitem; +analyze table nation; +analyze table orders; +analyze table part; +analyze table partsupp; +analyze table region; +analyze table supplier; diff --git a/scripts/mariadb.constraints.sql b/scripts/mariadb.constraints.sql new file mode 100644 index 0000000000000000000000000000000000000000..1abb54faa4654f9c544df26e6180ab401fdea1b8 --- /dev/null +++ b/scripts/mariadb.constraints.sql @@ -0,0 +1,44 @@ +-- TPCH integry constraints for postgres 9.x +-- hannes@cwi.nl, 2014-04-04 + +-- For table REGION +ALTER TABLE REGION ADD PRIMARY KEY (R_REGIONKEY); + +-- For table NATION +ALTER TABLE NATION ADD PRIMARY KEY (N_NATIONKEY); +ALTER TABLE NATION ADD CONSTRAINT NATION_FK1 FOREIGN KEY (N_REGIONKEY) REFERENCES REGION; + +-- For table PART +ALTER TABLE PART ADD PRIMARY KEY (P_PARTKEY); + +-- For table SUPPLIER +ALTER TABLE SUPPLIER ADD PRIMARY KEY (S_SUPPKEY); +ALTER TABLE SUPPLIER ADD CONSTRAINT SUPPLIER_FK1 FOREIGN KEY (S_NATIONKEY) REFERENCES NATION; + +-- For table PARTSUPP +ALTER TABLE PARTSUPP ADD PRIMARY KEY (PS_PARTKEY,PS_SUPPKEY); + +-- For table CUSTOMER +ALTER TABLE CUSTOMER ADD PRIMARY KEY (C_CUSTKEY); +ALTER TABLE CUSTOMER ADD CONSTRAINT CUSTOMER_FK1 FOREIGN KEY (C_NATIONKEY) REFERENCES NATION; + +-- For table LINEITEM +ALTER TABLE LINEITEM ADD PRIMARY KEY (L_ORDERKEY,L_LINENUMBER); + +-- For table ORDERS +ALTER TABLE ORDERS ADD PRIMARY KEY (O_ORDERKEY); + +-- For table PARTSUPP +ALTER TABLE PARTSUPP ADD CONSTRAINT PARTSUPP_FK1 FOREIGN KEY (PS_SUPPKEY) REFERENCES SUPPLIER; +ALTER TABLE PARTSUPP ADD CONSTRAINT PARTSUPP_FK2 FOREIGN KEY (PS_PARTKEY) REFERENCES PART; + +-- For table ORDERS +ALTER TABLE ORDERS ADD CONSTRAINT ORDERS_FK1 FOREIGN KEY (O_CUSTKEY) REFERENCES CUSTOMER; + +-- For table LINEITEM +ALTER TABLE LINEITEM ADD CONSTRAINT LINEITEM_FK1 FOREIGN KEY (L_ORDERKEY) REFERENCES ORDERS; +ALTER TABLE LINEITEM ADD CONSTRAINT LINEITEM_FK2 FOREIGN KEY (L_PARTKEY,L_SUPPKEY) REFERENCES PARTSUPP; + + + + diff --git a/scripts/mariadb.load.sql b/scripts/mariadb.load.sql new file mode 100644 index 0000000000000000000000000000000000000000..51a5a33b2896fb579c1c8e384928ae6b5b331ee8 --- /dev/null +++ b/scripts/mariadb.load.sql @@ -0,0 +1,8 @@ +LOAD DATA INFILE 'DIR/customer.tbl' INTO TABLE customer FIELDS TERMINATED BY '|'; +LOAD DATA INFILE 'DIR/lineitem.tbl' INTO TABLE lineitem FIELDS TERMINATED BY '|'; +LOAD DATA INFILE 'DIR/nation.tbl' INTO TABLE nation FIELDS TERMINATED BY '|'; +LOAD DATA INFILE 'DIR/orders.tbl' INTO TABLE orders FIELDS TERMINATED BY '|'; +LOAD DATA INFILE 'DIR/part.tbl' INTO TABLE part FIELDS TERMINATED BY '|'; +LOAD DATA INFILE 'DIR/partsupp.tbl' INTO TABLE partsupp FIELDS TERMINATED BY '|'; +LOAD DATA INFILE 'DIR/region.tbl' INTO TABLE region FIELDS TERMINATED BY '|'; +LOAD DATA INFILE 'DIR/supplier.tbl' INTO TABLE supplier FIELDS TERMINATED BY '|'; \ No newline at end of file diff --git a/scripts/mariadb.schema.sql b/scripts/mariadb.schema.sql new file mode 100644 index 0000000000000000000000000000000000000000..aed682ae7ea79aa99c7bea4c0e1b730a4234ef21 --- /dev/null +++ b/scripts/mariadb.schema.sql @@ -0,0 +1,72 @@ +-- TPCH schema postgres 9.x +-- hannes@cwi.nl, 2014-04-04 + +CREATE TABLE NATION ( N_NATIONKEY INTEGER NOT NULL, + N_NAME CHAR(25) NOT NULL, + N_REGIONKEY INTEGER NOT NULL, + N_COMMENT VARCHAR(152)); + +CREATE TABLE REGION ( R_REGIONKEY INTEGER NOT NULL, + R_NAME CHAR(25) NOT NULL, + R_COMMENT VARCHAR(152)); + +CREATE TABLE PART ( P_PARTKEY INTEGER NOT NULL, + P_NAME VARCHAR(55) NOT NULL, + P_MFGR CHAR(25) NOT NULL, + P_BRAND CHAR(10) NOT NULL, + P_TYPE VARCHAR(25) NOT NULL, + P_SIZE INTEGER NOT NULL, + P_CONTAINER CHAR(10) NOT NULL, + P_RETAILPRICE DECIMAL(15,2) NOT NULL, + P_COMMENT VARCHAR(23) NOT NULL ); + +CREATE TABLE SUPPLIER ( S_SUPPKEY INTEGER NOT NULL, + S_NAME CHAR(25) NOT NULL, + S_ADDRESS VARCHAR(40) NOT NULL, + S_NATIONKEY INTEGER NOT NULL, + S_PHONE CHAR(15) NOT NULL, + S_ACCTBAL DECIMAL(15,2) NOT NULL, + S_COMMENT VARCHAR(101) NOT NULL); + +CREATE TABLE PARTSUPP ( PS_PARTKEY INTEGER NOT NULL, + PS_SUPPKEY INTEGER NOT NULL, + PS_AVAILQTY INTEGER NOT NULL, + PS_SUPPLYCOST DECIMAL(15,2) NOT NULL, + PS_COMMENT VARCHAR(199) NOT NULL ); + +CREATE TABLE CUSTOMER ( C_CUSTKEY INTEGER NOT NULL, + C_NAME VARCHAR(25) NOT NULL, + C_ADDRESS VARCHAR(40) NOT NULL, + C_NATIONKEY INTEGER NOT NULL, + C_PHONE CHAR(15) NOT NULL, + C_ACCTBAL DECIMAL(15,2) NOT NULL, + C_MKTSEGMENT CHAR(10) NOT NULL, + C_COMMENT VARCHAR(117) NOT NULL); + +CREATE TABLE ORDERS ( O_ORDERKEY INTEGER NOT NULL, + O_CUSTKEY INTEGER NOT NULL, + O_ORDERSTATUS CHAR(1) NOT NULL, + O_TOTALPRICE DECIMAL(15,2) NOT NULL, + O_ORDERDATE DATE NOT NULL, + O_ORDERPRIORITY CHAR(15) NOT NULL, + O_CLERK CHAR(15) NOT NULL, + O_SHIPPRIORITY INTEGER NOT NULL, + O_COMMENT VARCHAR(79) NOT NULL); + +CREATE TABLE LINEITEM ( L_ORDERKEY INTEGER NOT NULL, + L_PARTKEY INTEGER NOT NULL, + L_SUPPKEY INTEGER NOT NULL, + L_LINENUMBER INTEGER NOT NULL, + L_QUANTITY DECIMAL(15,2) NOT NULL, + L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL, + L_DISCOUNT DECIMAL(15,2) NOT NULL, + L_TAX DECIMAL(15,2) NOT NULL, + L_RETURNFLAG CHAR(1) NOT NULL, + L_LINESTATUS CHAR(1) NOT NULL, + L_SHIPDATE DATE NOT NULL, + L_COMMITDATE DATE NOT NULL, + L_RECEIPTDATE DATE NOT NULL, + L_SHIPINSTRUCT CHAR(25) NOT NULL, + L_SHIPMODE CHAR(10) NOT NULL, + L_COMMENT VARCHAR(44) NOT NULL); +