diff --git a/Changelog b/Changelog
index 5dac3b8..4b82056 100644
--- a/Changelog
+++ b/Changelog
@@ -1,4 +1,8 @@
+1.2.0
+-----
+* Kernel parameters are now autotuned (refer to user manual)
* Added benchmark support for scan
+* Fixed sorting in benchmark tool to support 3-element value types
* Improved robustness to non-default locale
* Added --split-debug and --variant=symbols configuration options
diff --git a/README b/README
index e8f0552..feaac56 100644
--- a/README
+++ b/README
@@ -6,6 +6,7 @@ For the impatient, the process is
$ ./waf configure [ --prefix=install-path ]
$ ./waf build
$ sudo ./waf install
+$ clogs-tune
which will build and install to the install path (defaults to /usr/local),
and which will install the documentation in /usr/local/share/doc/clogs
diff --git a/doc/RELEASE-PROCESS b/doc/RELEASE-PROCESS
index 5fa1cd8..f792e68 100644
--- a/doc/RELEASE-PROCESS
+++ b/doc/RELEASE-PROCESS
@@ -3,6 +3,7 @@
- Update the Changelog
- Nuke old versions: rm -rf /usr/local/share/doc/clogs
- Run './waf dist' and check that the new version can install cleanly
+- Rerun autotuning (first nuke ~/.clogs)
- Run new benchmarks
- Rebuild the documentation
- Upload new documentation to Sourceforge page (install it first):
diff --git a/doc/benchmark/1.2.0/480gtx/uint-uint.txt b/doc/benchmark/1.2.0/480gtx/uint-uint.txt
new file mode 100644
index 0000000..9ca40a6
--- /dev/null
+++ b/doc/benchmark/1.2.0/480gtx/uint-uint.txt
@@ -0,0 +1,15 @@
+1000 2.67093
+2000 5.31897
+5000 12.8816
+10000 25.0811
+20000 46.2133
+50000 90.1553
+100000 132.811
+200000 170.806
+500000 162.346
+1000000 233.661
+2000000 277.364
+5000000 400.125
+10000000 399.896
+20000000 421.69
+50000000 441.952
diff --git a/doc/benchmark/1.2.0/480gtx/uint-void.txt b/doc/benchmark/1.2.0/480gtx/uint-void.txt
new file mode 100644
index 0000000..24cfe16
--- /dev/null
+++ b/doc/benchmark/1.2.0/480gtx/uint-void.txt
@@ -0,0 +1,15 @@
+1000 4.60812
+2000 9.03748
+5000 20.734
+10000 35.8347
+20000 54.7197
+50000 83.8023
+100000 89.9868
+200000 97.9026
+500000 162.255
+1000000 256.118
+2000000 323.597
+5000000 444.303
+10000000 504.673
+20000000 545.87
+50000000 571.253
diff --git a/doc/benchmark/1.2.0/480gtx/ulong-float4.txt b/doc/benchmark/1.2.0/480gtx/ulong-float4.txt
new file mode 100644
index 0000000..250cde9
--- /dev/null
+++ b/doc/benchmark/1.2.0/480gtx/ulong-float4.txt
@@ -0,0 +1,13 @@
+1000 2.36996
+2000 4.52886
+5000 10.2097
+10000 16.9437
+20000 24.1824
+50000 41.5291
+100000 57.4086
+200000 75.9464
+500000 101.052
+1000000 112.921
+2000000 122.339
+5000000 127.519
+10000000 129.692
diff --git a/doc/benchmark/1.2.0/clogs-benchmark-480gtx.svg b/doc/benchmark/1.2.0/clogs-benchmark-480gtx.svg
new file mode 100644
index 0000000..33402ef
--- /dev/null
+++ b/doc/benchmark/1.2.0/clogs-benchmark-480gtx.svg
@@ -0,0 +1,242 @@
+
+
+
+
diff --git a/doc/benchmark/clogs-benchmark-plot.sh b/doc/benchmark/clogs-benchmark-plot.sh
index 42a1038..3cfa011 100755
--- a/doc/benchmark/clogs-benchmark-plot.sh
+++ b/doc/benchmark/clogs-benchmark-plot.sh
@@ -1,6 +1,6 @@
#!/bin/sh
-# Copyright (c) 2012 University of Cape Town
+# Copyright (c) 2012-2013 University of Cape Town
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -22,3 +22,4 @@
gnuplot -e "in='1.0.0/480gtx/'; out='1.0.0/clogs-benchmark-480gtx.svg'; device='GeForce 480 GTX'" clogs-benchmark.plot
gnuplot -e "in='1.0.2/hd6790/'; out='1.0.2/clogs-benchmark-hd6790.svg'; device='Radeon HD 6790'" clogs-benchmark.plot
+gnuplot -e "in='1.2.0/480gtx/'; out='1.2.0/clogs-benchmark-480gtx.svg'; device='GeForce 480 GTX'" clogs-benchmark.plot
diff --git a/doc/benchmark/clogs-benchmark-run.sh b/doc/benchmark/clogs-benchmark-run.sh
index 05630fb..5060719 100755
--- a/doc/benchmark/clogs-benchmark-run.sh
+++ b/doc/benchmark/clogs-benchmark-run.sh
@@ -1,6 +1,6 @@
#!/bin/sh
-# Copyright (c) 2012 University of Cape Town
+# Copyright (c) 2012-2013 University of Cape Town
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -20,6 +20,7 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
+export LC_ALL=C
for i in 1000 2000 5000 10000 20000 50000 100000 200000 500000 1000000 2000000 5000000 10000000 20000000 50000000; do
- echo -n "$i "; clogs-benchmark "$@" --iterations 50 --items $i | tail -n 1 | sed 's/.* \(.*\)M.s/\1/'
+ echo -n "$i "; clogs-benchmark "$@" --iterations 50 --items $i --cl-gpu | tail -n 1 | sed 's/.* \(.*\)M.s/\1/'
done
diff --git a/doc/clogs-user.xml b/doc/clogs-user.xml
index a4a733c..310193f 100644
--- a/doc/clogs-user.xml
+++ b/doc/clogs-user.xml
@@ -418,7 +418,8 @@ sorter.enqueue(queue, keys, values, numElements, 20, &wait, &event);
- The performance on AMD GPUs will need a lot of work:
+ The performance on AMD GPUs still needs work. This graph is for 1.0.x, so
+ performance is expected to be better now but it has not been measured.
diff --git a/wscript b/wscript
index c43518f..1adc44d 100644
--- a/wscript
+++ b/wscript
@@ -27,7 +27,7 @@ from waflib.TaskGen import feature, after_method
from waflib import Task
APPNAME = 'clogs'
-VERSION = '1.1.0'
+VERSION = '1.2.0'
out = 'build'
variants = {
@@ -243,7 +243,7 @@ def build(bld):
)
bld(
rule = simple_copy,
- source = 'doc/benchmark/1.0.0/clogs-benchmark-480gtx.svg',
+ source = 'doc/benchmark/1.2.0/clogs-benchmark-480gtx.svg',
target = 'doc/images/clogs-benchmark-480gtx.svg'
)
bld(