1#!/bin/bash 2 3######################## 4# Function definitions # 5######################## 6 7source "$(dirname $0)/measurement-functions" 8 9function run_test { 10 local tmp avg1 stddev1 avg2 stddev2 avg4 stddev4 p 11 12 tmp="/tmp/test-timing.$$" 13 14 rm -f "${tmp}" 15 p=1 16 test_output="${1}-p${p}.out" measure_runtime "$@" -p${psep}${p} "${test_args}" | avgstddev > "$tmp" 17 read avg1 stddev1 vsz1 vszdev1 rest < "$tmp" 18 echo "Average time: ${avg1} +/- ${stddev1} seconds." \ 19 " VSZ: ${vsz1} +/- ${vszdev1} KB" 20 21 if [ "${rest}" != "" ]; then 22 echo "Internal error ($rest)" 23 exit 1 24 fi 25 26 rm -f "${tmp}" 27 p=2 28 test_output="${1}-p${p}.out" measure_runtime "$@" -p${psep}${p} "${test_args}" | avgstddev > "$tmp" 29 read avg2 stddev2 vsz2 vszdev2 rest < "$tmp" 30 echo "Average time: ${avg2} +/- ${stddev2} seconds." \ 31 " VSZ: ${vsz2} +/- ${vszdev2} KB" 32 33 if [ "${rest}" != "" ]; then 34 echo "Internal error ($rest)" 35 exit 1 36 fi 37 38 rm -f "${tmp}" 39 p=4 40 test_output="${1}-p${p}.out" measure_runtime "$@" -p${psep}${p} "${test_args}" | avgstddev > "$tmp" 41 read avg4 stddev4 vsz4 vszdev4 rest < "$tmp" 42 echo "Average time: ${avg4} +/- ${stddev4} seconds." \ 43 " VSZ: ${vsz4} +/- ${vszdev4} KB" 44 rm -f "$tmp" 45 46 if [ "${rest}" != "" ]; then 47 echo "Internal error ($rest)" 48 exit 1 49 fi 50 51 p=1 52 test_output="/dev/null" \ 53 print_runtime_ratio "${avg1}" "${stddev1}" "${vsz1}" "${vszdev1}" "$VG" --tool=none "$@" -p${psep}${p} "${test_args}" 54 55 p=4 56 test_output="/dev/null" \ 57 print_runtime_ratio "${avg4}" "${stddev4}" "${vsz4}" "${vszdev4}" "$VG" --tool=none "$@" -p${psep}${p} "${test_args}" 58 59 p=4 60 test_output="${1}-drd-with-stack-var-4.out" \ 61 print_runtime_ratio "${avg4}" "${stddev4}" "${vsz4}" "${vszdev4}" \ 62 "$VG" --tool=drd --first-race-only=yes --check-stack-var=yes \ 63 --drd-stats=yes "$@" -p${psep}${p} "${test_args}" 64 65 p=4 66 test_output="${1}-drd-without-stack-var-4.out" \ 67 print_runtime_ratio "${avg4}" "${stddev4}" "${vsz4}" "${vszdev4}" \ 68 "$VG" --tool=drd --first-race-only=yes --check-stack-var=no \ 69 --drd-stats=yes "$@" -p${psep}${p} "${test_args}" 70 71 p=4 72 test_output="${1}-helgrind-4-none.out" \ 73 print_runtime_ratio "${avg4}" "${stddev4}" "${vsz4}" "${vszdev4}" "$VG" --tool=helgrind --history-level=none "$@" -p${psep}${p} "${test_args}" 74 75 p=4 76 test_output="${1}-helgrind-4-approx.out" \ 77 print_runtime_ratio "${avg4}" "${stddev4}" "${vsz4}" "${vszdev4}" "$VG" --tool=helgrind --history-level=approx "$@" -p${psep}${p} "${test_args}" 78 79 p=4 80 test_output="${1}-helgrind-4-full.out" \ 81 print_runtime_ratio "${avg4}" "${stddev4}" "${vsz4}" "${vszdev4}" "$VG" --tool=helgrind --history-level=full "$@" -p${psep}${p} "${test_args}" 82 83 echo '' 84} 85 86 87######################## 88# Script body # 89######################## 90 91DRD_SCRIPTS_DIR="$(dirname $0)" 92if [ "${DRD_SCRIPTS_DIR:0:1}" != "/" ]; then 93 DRD_SCRIPTS_DIR="$PWD/$DRD_SCRIPTS_DIR" 94fi 95 96SPLASH2="${DRD_SCRIPTS_DIR}/../splash2" 97if [ ! -e "${SPLASH2}" ]; then 98 echo "Error: splash2 directory not found (${SPLASH2})." 99 exit 1 100fi 101 102if [ "$VG" = "" ]; then 103 VG="${DRD_SCRIPTS_DIR}/../../vg-in-place" 104fi 105 106if [ ! -e "$VG" ]; then 107 echo "Could not find $VG." 108 exit 1 109fi 110 111###################################################################################################################### 112# Meaning of the different colums: 113# 1. SPLASH2 test name. 114# 2. Execution time in seconds for native run with argument -p1. 115# 3. Virtual memory size in KB for the native run with argument -p1. 116# 4. Execution time in seconds for native run with argument -p2. 117# 5. Virtual memory size in KB for the native run with argument -p2. 118# 6. Execution time in seconds for native run with argument -p4. 119# 7. Virtual memory size in KB for the native run with argument -p4. 120# 8. Execution time ratio for --tool=none -p1 versus -p1. 121# 9. Virtual memory size ratio for --tool=none -p1 versus -p1. 122# 10. Execution time ratio for --tool=none -p4 versus -p4. 123# 11. Virtual memory size ratio for --tool=none -p4 versus -p4. 124# 12. Execution time ratio for --tool=drd --check-stack-var=yes -p4 versus -p4. 125# 13. Virtual memory size ratio for --tool=drd --check-stack-var=yes -p4 versus -p4. 126# 14. Execution time ratio for --tool=drd --check-stack-var=no -p4 versus -p4. 127# 15. Virtual memory size ratio for --tool=drd --check-stack-var=no -p4 versus -p4. 128# 16. Execution time ratio for --tool=helgrind --history-level=none -p4 versus -p4. 129# 17. Virtual memory size ratio for --tool=helgrind --history-level=none -p4 versus -p4. 130# 18. Execution time ratio for --tool=helgrind --history-level=approx -p4 versus -p4. 131# 19. Virtual memory size ratio for --tool=helgrind --history-level=approx -p4 versus -p4. 132# 20. Execution time ratio for --tool=helgrind --history-level=full -p4 versus -p4. 133# 21. Virtual memory size ratio for --tool=helgrind --history-level=full -p4 versus -p4. 134# 22. Execution time ratio for Intel Thread Checker -p4 versus -p4. 135# 23. Execution time ratio for Intel Thread Checker -p4 versus -p4. 136# 137# Notes: 138# - Both Helgrind and DRD use a granularity of one byte for data race detection. 139# - Helgrind does detect data races on stack variables. DRD only detects 140# data races on stack variables with --check-stack-var=yes. 141# - The ITC tests have been run on a 4-way 2.5 GHz Pentium 4 workstation, most 142# likely running a 32-bit OS. Not yet clear to me: which OS ? Which 143# granularity does ITC use ? And which m4 macro's have been used by ITC as 144# implementation of the synchronization primitives ? 145# 146# 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 147################################################################################################################################################ 148# Results: native native native none none DRD DRD HG HG HG ITC ITC 149# -p1 -p2 -p4 -p1 -p4 -p4 -p4+f -p4 -p4 -p4 -p4 -p4+f 150# .............................................................................................................................................. 151# Cholesky 0.11 12016 0.06 22016 0.55 41328 10.3 4.92 1.7 2.14 15 2.61 8 2.61 10 3.96 10 3.96 15 6.14 239 82 152# FFT 0.02 6692 0.02 14888 0.02 31621 17.0 8.01 20.0 2.48 114 3.15 64 3.28 81 4.52 81 4.52 116 5.56 90 41 153# LU, contiguous 0.08 4100 0.05 12304 0.06 28712 11.1 12.44 18.5 2.64 104 3.18 70 3.18 87 4.84 89 4.84 118 5.55 428 128 154# Ocean, contiguous 0.23 16848 0.19 25384 0.23 42528 6.3 3.78 8.3 2.11 87 2.82 62 4.02 71 3.75 71 3.75 195 5.96 90 28 155# Radix 0.21 15136 0.14 23336 0.15 39728 12.6 4.10 22.3 2.19 61 2.87 41 2.94 52 4.03 52 4.03 85 6.13 222 56 156# Raytrace 0.63 207104 0.49 215296 0.49 231680 8.9 1.23 12.9 1.20 385 1.38 86 2.10 158 3.70 160 3.70 222 4.15 172 53 157# Water-n2 0.18 10696 0.09 27072 0.11 59832 12.5 5.46 26.7 1.80 3092 3.03 263 3.06 92 3.28 92 3.28 92 3.55 189 39 158# Water-sp 0.20 4444 0.15 13536 0.10 30269 10.6 11.56 27.0 2.52 405 3.29 69 3.42 95 4.59 95 4.59 97 4.73 183 34 159# .............................................................................................................................................. 160# geometric mean 0.14 13024 0.10 25669 0.14 47655 10.8 5.26 13.5 2.08 161 2.71 59 3.03 66 4.05 66 4.05 95 5.13 180 51 161# .............................................................................................................................................. 162# Hardware: dual-core Intel Core2 Duo E6750, 2.66 GHz, 4 MB L2 cache, 2 GB RAM. 163# Software: openSUSE 11.0 (64-bit edition), runlevel 3, kernel 2.6.30.1, gcc 4.3.1, 32 bit SPLASH-2 executables, valgrind trunk r10648. 164################################################################################################################################################ 165 166#### 167# Notes: 168# - The ITC performance numbers in the above table originate from table 1 in 169# the following paper: 170# Paul Sack, Brian E. Bliss, Zhiqiang Ma, Paul Petersen, Josep Torrellas, 171# Accurate and efficient filtering for the Intel thread checker race 172# detector, Proceedings of the 1st workshop on Architectural and system 173# support for improving software dependability, San Jose, California, 174# 2006. Pages: 34 - 41. 175# - The input parameters for benchmarks below originate from table 1 in the 176# following paper: 177# The SPLASH-2 programs: characterization and methodological considerations 178# Woo, S.C.; Ohara, M.; Torrie, E.; Singh, J.P.; Gupta, A. 179# 1995. Proceedings of the 22nd Annual International Symposium on Computer 180# Architecture, 22-24 Jun 1995, Page(s): 24 - 36. 181# ftp://www-flash.stanford.edu/pub/splash2/splash2_isca95.ps.Z 182#### 183 184cache_size=$(get_cache_size) 185log2_cache_size=$(log2 ${cache_size}) 186 187# Cholesky 188( 189 cd ${SPLASH2}/codes/kernels/cholesky/inputs 190 for f in *Z 191 do 192 gzip -cd <$f >${f%.Z} 193 done 194 test_args=tk15.O run_test ../CHOLESKY -C$((cache_size)) 195) 196 197# FFT 198run_test ${SPLASH2}/codes/kernels/fft/FFT -t -l$((log2_cache_size/2)) -m16 199 200# LU, contiguous blocks. 201run_test ${SPLASH2}/codes/kernels/lu/contiguous_blocks/LU -n512 202 203# LU, non-contiguous blocks. 204#run_test ${SPLASH2}/codes/kernels/lu/non_contiguous_blocks/LU -n512 205 206# Ocean 207run_test ${SPLASH2}/codes/apps/ocean/contiguous_partitions/OCEAN -n258 208#run_test ${SPLASH2}/codes/apps/ocean/non_contiguous_partitions/OCEAN -n258 209 210# Radiosity. Runs fine on a 32-bit OS, but deadlocks on a 64-bit OS. Not clear to me why. 211if [ $(uname -p) = "i686" ]; then 212 psep=' ' run_test ${SPLASH2}/codes/apps/radiosity/RADIOSITY -batch -room -ae 5000.0 -en 0.050 -bf 0.10 213fi 214 215# Radix 216run_test ${SPLASH2}/codes/kernels/radix/RADIX -n$((2**20)) -r1024 217 218# Raytrace 219( 220 cd ${SPLASH2}/codes/apps/raytrace/inputs 221 rm -f *.env *.geo *.rl 222 for f in *Z 223 do 224 gzip -cd <$f >${f%.Z} 225 done 226 cd .. 227 test_args=inputs/car.env psep='' run_test ./RAYTRACE -m64 228) 229 230# Water-n2 231( 232 cd ${SPLASH2}/codes/apps/water-nsquared 233 test_input=${DRD_SCRIPTS_DIR}/run-splash2-water-input psep=' ' run_test ./WATER-NSQUARED 234) 235 236# Water-sp 237( 238 cd ${SPLASH2}/codes/apps/water-spatial 239 test_input=${DRD_SCRIPTS_DIR}/run-splash2-water-input psep=' ' run_test ./WATER-SPATIAL 240) 241 242 243 244# Local variables: 245# compile-command: "./run-splash2" 246# End: 247