diff --git a/Changelog b/Changelog index 5dac3b8..4b82056 100644 --- a/Changelog +++ b/Changelog @@ -1,4 +1,8 @@ +1.2.0 +----- +* Kernel parameters are now autotuned (refer to user manual) * Added benchmark support for scan +* Fixed sorting in benchmark tool to support 3-element value types * Improved robustness to non-default locale * Added --split-debug and --variant=symbols configuration options diff --git a/README b/README index e8f0552..feaac56 100644 --- a/README +++ b/README @@ -6,6 +6,7 @@ For the impatient, the process is $ ./waf configure [ --prefix=install-path ] $ ./waf build $ sudo ./waf install +$ clogs-tune which will build and install to the install path (defaults to /usr/local), and which will install the documentation in /usr/local/share/doc/clogs diff --git a/doc/RELEASE-PROCESS b/doc/RELEASE-PROCESS index 5fa1cd8..f792e68 100644 --- a/doc/RELEASE-PROCESS +++ b/doc/RELEASE-PROCESS @@ -3,6 +3,7 @@ - Update the Changelog - Nuke old versions: rm -rf /usr/local/share/doc/clogs - Run './waf dist' and check that the new version can install cleanly +- Rerun autotuning (first nuke ~/.clogs) - Run new benchmarks - Rebuild the documentation - Upload new documentation to Sourceforge page (install it first): diff --git a/doc/benchmark/1.2.0/480gtx/uint-uint.txt b/doc/benchmark/1.2.0/480gtx/uint-uint.txt new file mode 100644 index 0000000..9ca40a6 --- /dev/null +++ b/doc/benchmark/1.2.0/480gtx/uint-uint.txt @@ -0,0 +1,15 @@ +1000 2.67093 +2000 5.31897 +5000 12.8816 +10000 25.0811 +20000 46.2133 +50000 90.1553 +100000 132.811 +200000 170.806 +500000 162.346 +1000000 233.661 +2000000 277.364 +5000000 400.125 +10000000 399.896 +20000000 421.69 +50000000 441.952 diff --git a/doc/benchmark/1.2.0/480gtx/uint-void.txt b/doc/benchmark/1.2.0/480gtx/uint-void.txt new file mode 100644 index 0000000..24cfe16 --- /dev/null +++ b/doc/benchmark/1.2.0/480gtx/uint-void.txt @@ -0,0 +1,15 @@ +1000 4.60812 +2000 9.03748 +5000 20.734 +10000 35.8347 +20000 54.7197 +50000 83.8023 +100000 89.9868 +200000 97.9026 +500000 162.255 +1000000 256.118 +2000000 323.597 +5000000 444.303 +10000000 504.673 +20000000 545.87 +50000000 571.253 diff --git a/doc/benchmark/1.2.0/480gtx/ulong-float4.txt b/doc/benchmark/1.2.0/480gtx/ulong-float4.txt new file mode 100644 index 0000000..250cde9 --- /dev/null +++ b/doc/benchmark/1.2.0/480gtx/ulong-float4.txt @@ -0,0 +1,13 @@ +1000 2.36996 +2000 4.52886 +5000 10.2097 +10000 16.9437 +20000 24.1824 +50000 41.5291 +100000 57.4086 +200000 75.9464 +500000 101.052 +1000000 112.921 +2000000 122.339 +5000000 127.519 +10000000 129.692 diff --git a/doc/benchmark/1.2.0/clogs-benchmark-480gtx.svg b/doc/benchmark/1.2.0/clogs-benchmark-480gtx.svg new file mode 100644 index 0000000..33402ef --- /dev/null +++ b/doc/benchmark/1.2.0/clogs-benchmark-480gtx.svg @@ -0,0 +1,242 @@ + + + + +Produced by GNUPLOT 4.4 patchlevel 3 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0 + + + + + + + + + + 100 + + + + + + + + + + 200 + + + + + + + + + + 300 + + + + + + + + + + 400 + + + + + + + + + + 500 + + + + + + + + + + 600 + + + + 100 + + + + 1000 + + + + 10000 + + + + 100000 + + + + 1e+06 + + + + 1e+07 + + + + 1e+08 + + + + MKeys/s + + + Elements + + + CLOGS sorting rate on GeForce 480 GTX + + + + + + uint keys / no values + + + + + + + + + + + + + + + + + + + + + + + + uint keys / uint values + + + + + + + + + + + + + + + + + + + + + + + + ulong keys / float4 values + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/benchmark/clogs-benchmark-plot.sh b/doc/benchmark/clogs-benchmark-plot.sh index 42a1038..3cfa011 100755 --- a/doc/benchmark/clogs-benchmark-plot.sh +++ b/doc/benchmark/clogs-benchmark-plot.sh @@ -1,6 +1,6 @@ #!/bin/sh -# Copyright (c) 2012 University of Cape Town +# Copyright (c) 2012-2013 University of Cape Town # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -22,3 +22,4 @@ gnuplot -e "in='1.0.0/480gtx/'; out='1.0.0/clogs-benchmark-480gtx.svg'; device='GeForce 480 GTX'" clogs-benchmark.plot gnuplot -e "in='1.0.2/hd6790/'; out='1.0.2/clogs-benchmark-hd6790.svg'; device='Radeon HD 6790'" clogs-benchmark.plot +gnuplot -e "in='1.2.0/480gtx/'; out='1.2.0/clogs-benchmark-480gtx.svg'; device='GeForce 480 GTX'" clogs-benchmark.plot diff --git a/doc/benchmark/clogs-benchmark-run.sh b/doc/benchmark/clogs-benchmark-run.sh index 05630fb..5060719 100755 --- a/doc/benchmark/clogs-benchmark-run.sh +++ b/doc/benchmark/clogs-benchmark-run.sh @@ -1,6 +1,6 @@ #!/bin/sh -# Copyright (c) 2012 University of Cape Town +# Copyright (c) 2012-2013 University of Cape Town # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -20,6 +20,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +export LC_ALL=C for i in 1000 2000 5000 10000 20000 50000 100000 200000 500000 1000000 2000000 5000000 10000000 20000000 50000000; do - echo -n "$i "; clogs-benchmark "$@" --iterations 50 --items $i | tail -n 1 | sed 's/.* \(.*\)M.s/\1/' + echo -n "$i "; clogs-benchmark "$@" --iterations 50 --items $i --cl-gpu | tail -n 1 | sed 's/.* \(.*\)M.s/\1/' done diff --git a/doc/clogs-user.xml b/doc/clogs-user.xml index a4a733c..310193f 100644 --- a/doc/clogs-user.xml +++ b/doc/clogs-user.xml @@ -418,7 +418,8 @@ sorter.enqueue(queue, keys, values, numElements, 20, &wait, &event); - The performance on AMD GPUs will need a lot of work: + The performance on AMD GPUs still needs work. This graph is for 1.0.x, so + performance is expected to be better now but it has not been measured. diff --git a/wscript b/wscript index c43518f..1adc44d 100644 --- a/wscript +++ b/wscript @@ -27,7 +27,7 @@ from waflib.TaskGen import feature, after_method from waflib import Task APPNAME = 'clogs' -VERSION = '1.1.0' +VERSION = '1.2.0' out = 'build' variants = { @@ -243,7 +243,7 @@ def build(bld): ) bld( rule = simple_copy, - source = 'doc/benchmark/1.0.0/clogs-benchmark-480gtx.svg', + source = 'doc/benchmark/1.2.0/clogs-benchmark-480gtx.svg', target = 'doc/images/clogs-benchmark-480gtx.svg' ) bld(