forked from mratsim/Arraymancer
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patharraymancer.nimble
294 lines (248 loc) · 12.3 KB
/
arraymancer.nimble
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
### Package
version = "0.7.33"
author = "Mamy André-Ratsimbazafy"
description = "A n-dimensional tensor (ndarray) library"
license = "Apache License 2.0"
### Dependencies
requires "nim >= 1.6.0",
"nimblas >= 0.3.1",
"nimlapack >= 0.3.1",
"nimcuda >= 0.2.0",
"nimcl >= 0.1.3",
"clblast >= 0.0.2",
"stb_image",
"zip",
"untar"
## Install files
srcDir = "src"
# for tests that require backward compatible RNG
from os import extractFilename
########################################################
# External libs configuration
### BLAS support
## OSX
# switch("define","openblas")
# switch("clibdir", "/usr/local/opt/openblas/lib")
# switch("cincludes", "/usr/local/opt/openblas/include")
## Archlinux
# Contrary to Debian-based distro, blas.dll doesn't supply the cblas interface
# so "-d:blas=cblas" must be passed
### BLIS support
# switch("define","blis")
### MKL support
# Check the mkl switches in the test file for single-threaded and openp version
template mkl_threadedSwitches(switches: var string) =
switches.add " --stackTrace:off"
switches.add " --d:blas=mkl_intel_lp64"
switches.add " --d:lapack=mkl_intel_lp64"
switches.add " --clibdir:/opt/intel/mkl/lib/intel64"
switches.add " --passl:/opt/intel/mkl/lib/intel64/libmkl_intel_lp64.a"
switches.add " --passl:-lmkl_core"
switches.add " --passl:-lmkl_gnu_thread"
switches.add " --passl:-lgomp"
switches.add " --dynlibOverride:mkl_intel_lp64"
template mkl_singleSwitches(switches: var string) =
switches.add " --d:blas=mkl_intel_lp64"
switches.add " --d:lapack=mkl_intel_lp64"
switches.add " --clibdir:/opt/intel/mkl/lib/intel64"
switches.add " --passl:/opt/intel/mkl/lib/intel64/libmkl_intel_lp64.a"
switches.add " --passl:-lmkl_core"
switches.add " --passl:-lmkl_sequential"
switches.add " --dynlibOverride:mkl_intel_lp64"
# ### Cuda configuration
## Pass -d:cuda to build arraymancer with cuda support
## Use the cuda switches below
## Replace /opt/cuda by your own path
## TODO: auto detection or at least check in common directories
## Note: It is import to gate compiler flags like -march=native behind Xcompiler "-Xcompiler -march=native"
# NVCC config
template cudaSwitches(switches: var string) =
switches.add " --cincludes:/opt/cuda/include"
switches.add " --cc:clang" # We trick Nim about nvcc being clang, pending https://github.com/nim-lang/Nim/issues/6372
switches.add " --clang.exe:/opt/cuda/bin/nvcc"
switches.add " --clang.linkerexe:/opt/cuda/bin/nvcc"
switches.add " --clang.cpp.exe:/opt/cuda/bin/nvcc"
switches.add " --clang.cpp.linkerexe:/opt/cuda/bin/nvcc"
# Due to the __ldg intrinsics in kernels
# we only support compute capabilities 3.5+
# See here: http://docs.nvidia.com/cuda/pascal-compatibility-guide/index.html
# And wikipedia for GPU capabilities: https://en.wikipedia.org/wiki/CUDA
# Note: the switches below might conflict with nim.cfg
# switches.add " --gcc.options.always:\"-arch=sm_61 --x cu\"" # Interpret .c files as .cu
# switches.add " --gcc.cpp.options.always:\"-arch=sm_61 --x cu -Xcompiler -fpermissive\"" # Interpret .c files as .cu, gate fpermissive behind Xcompiler
switches.add " -d:cudnn"
template cuda_mkl_openmp(switches: var string) =
switches.mkl_threadedSwitches()
switches.add " --cincludes:/opt/cuda/include"
switches.add " --cc:clang" # We trick Nim about nvcc being clang, pending https://github.com/nim-lang/Nim/issues/6372
switches.add " --clang.exe:/opt/cuda/bin/nvcc"
switches.add " --clang.linkerexe:/opt/cuda/bin/nvcc"
switches.add " --clang.cpp.exe:/opt/cuda/bin/nvcc"
switches.add " --clang.cpp.linkerexe:/opt/cuda/bin/nvcc"
# Note: the switches below might conflict with nim.cfg
# switches.add " --gcc.options.always:\"-arch=sm_61 --x cu -Xcompiler -fopenmp -Xcompiler -march=native\""
# switches.add " --gcc.cpp.options.always:\"-arch=sm_61 --x cu -Xcompiler -fopenmp -Xcompiler -march=native\""
# Clang config - make sure Clang supports your CUDA SDK version
# https://gist.github.com/ax3l/9489132
# https://llvm.org/docs/CompileCudaWithLLVM.html
# | clang++ | supported CUDA release | supported SMs |
# | ------- | ---------------------- | ------------- |
# | 3.9-5.0 | 7.0-8.0 | 2.0-(5.0)6.0 |
# | 6.0 | [7.0-9.0](https://github.com/llvm-mirror/clang/blob/release_60/include/clang/Basic/Cuda.h) | [(2.0)3.0-7.0](https://github.com/llvm-mirror/clang/blob/release_60/lib/Basic/Targets/NVPTX.cpp#L163-L188) |
# | 7.0 | [7.0-9.2](https://github.com/llvm-mirror/clang/blob/release_70/include/clang/Basic/Cuda.h) | [(2.0)3.0-7.2](https://github.com/llvm-mirror/clang/blob/release_70/lib/Basic/Targets/NVPTX.cpp#L196-L223) |
# | 8.0 | [7.0-10.0](https://github.com/llvm-mirror/clang/blob/release_80/include/clang/Basic/Cuda.h) | [(2.0)3.0-7.5](https://github.com/llvm-mirror/clang/blob/release_70/lib/Basic/Targets/NVPTX.cpp#L199-L228) |
# | trunk | [7.0-10.1](https://github.com/llvm-mirror/clang/blob/master/include/clang/Basic/Cuda.h) | [(2.0)3.0-7.5](https://github.com/llvm-mirror/clang/blob/master/lib/Basic/Targets/NVPTX.cpp#L200-L229) |
#
# template cudaSwitches(switches: var string) =
# switches.add " --cincludes:/opt/cuda/include"
# switches.add " --clibdir:/opt/cuda/lib"
# switches.add " --cc:clang"
# switches.add " --clang.cpp.options.always:\"--cuda-path=/opt/cuda -lcudart_static -x cuda --cuda-gpu-arch=sm_61 --cuda-gpu-arch=sm_75\""
# switches.add " -d:cudnn"
# template cuda_mkl_openmp(switches: var string) =
# switches.mkl_threadedSwitches()
# switches.add " --cincludes:/opt/cuda/include"
# switches.add " --clibdir:/opt/cuda/lib"
# switches.add " --cc:clang"
# switches.add " --clang.cpp.options.always:\"--cuda-path=/opt/cuda -lcudart_static -x cuda --cuda-gpu-arch=sm_61 --cuda-gpu-arch=sm_75 -fopenmp -march=native\""
# switches.add " -d:cudnn"
########################################################
# Optimization
# Multithreading
# use the -d:openmp switch
# which passC: -fopenmp to the compiler
# Native processor optimization
# use the -d:native
# which passC: -march=native to the compiler
##########################################################################
## Testing tasks
proc test(name, switches = "", split = false, lang = "c") =
if not dirExists "build":
mkDir "build"
if not split:
exec "nim " & lang & " -d:ssl -o:build/" & name & switches & " -r tests/" & name & ".nim"
else:
exec "nim " & lang & " -d:ssl -o:build/" & name & switches & " -r tests/_split_tests/" & name & ".nim"
# run tests that require old RNG for backward compat. reasos
let rngTests = ["spatial/test_kdtree",
"ml/test_clustering"]
for t in rngTests:
exec "nim " & lang & " -o:build/" & t.extractFilename & switches &
" -d:nimLegacyRandomInitRand -r tests/" & $t & ".nim"
# try to compile (not run) all examples to avoid regressions
let examples = @["ex01_xor_perceptron_from_scratch.nim",
"ex02_handwritten_digits_recognition.nim",
"ex03_simple_two_layers.nim",
"ex04_fizzbuzz_interview_cheatsheet.nim",
"ex05_sequence_classification_GRU.nim",
"ex06_shakespeare_generator.nim",
"ex07_save_load_model.nim"]
for ex in examples:
exec "nim " & lang & " -o:build/" & name & switches & " examples/" & $ex
task all_tests, "Run all tests - Intel MKL + Cuda + OpenCL + OpenMP":
var switches = " -d:cuda -d:opencl -d:openmp"
switches.cuda_mkl_openmp()
test "full_test_suite", switches, split=false, lang="cpp"
# Split tests are unnecessary after 1.0.0 (no more 3GB+ memory used when compiling)
#
# task test, "Run all tests - Default BLAS & Lapack":
# test "tests_tensor_part01", "", split = true
# test "tests_tensor_part02", "", split = true
# test "tests_tensor_part03", "", split = true
# test "tests_tensor_part04", "", split = true
# test "tests_tensor_part05", "", split = true
# test "tests_cpu_remainder", "", split = true
#
# task test_no_lapack, "Run all tests - Default BLAS without lapack":
# let switch = " -d:no_lapack"
# test "tests_tensor_part01", switch, split = true
# test "tests_tensor_part02", switch, split = true
# test "tests_tensor_part03", switch, split = true
# test "tests_tensor_part04", switch, split = true
# test "tests_tensor_part05", switch, split = true
# test "tests_cpu_remainder", switch, split = true
task test, "Run all tests - Default BLAS & Lapack":
test "tests_cpu", "", split = false
task test_arc, "Run all tests under ARC - Default BLAS & Lapack":
test "tests_cpu", "--gc:arc", split = false
task test_orc, "Run all tests under ORC - Default BLAS & Lapack":
test "tests_cpu", "--gc:orc", split = false
task test_no_lapack, "Run all tests - Default BLAS without lapack":
let switch = " -d:no_lapack"
test "tests_cpu", switch, split = false
task test_no_lapack_arc, "Run all tests - Default BLAS without lapack under ARC":
let switch = " -d:no_lapack --gc:arc"
test "tests_cpu", switch, split = false
task test_cpp, "Run all tests - Cpp codegen":
test "tests_cpu", "", split = false, "cpp"
task test_cuda, "Run all tests - Cuda backend with CUBLAS and CuDNN":
var switches = " -d:cuda -d:cudnn"
switches.add " -d:blas=cblas" # Archlinux, comment out on Debian/Ubuntu
switches.cudaSwitches()
test "tests_cuda", switches, split = false, "cpp"
task test_opencl, "Run all OpenCL backend tests":
var switches = " -d:opencl"
switches.add " -d:blas=cblas" # Archlinux, comment out on Debian/Ubuntu
test "tests_opencl", switches, split = false, "cpp"
# task test_deprecated, "Run all tests on deprecated procs":
# test "tests_cpu_deprecated"
task test_openblas, "Run all tests - OpenBLAS":
var switches = " -d:blas=openblas -d:lapack=openblas"
when defined(macosx):
## Should work but somehow Nim doesn't find libopenblas.dylib on MacOS
switches.add " --clibdir:/usr/local/opt/openblas/lib"
switches.add " --cincludes:/usr/local/opt/openblas/include"
test "tests_cpu", switches
task test_blis, "Run all tests - BLIS":
test "tests_cpu", " -d:blis"
task test_native, "Run all tests - march=native":
test "tests_cpu", " -d:native"
task test_openmp, "Run all tests - OpenMP":
var switches = " -d:openmp"
switches.add " --stackTrace:off" # stacktraces interfere with OpenMP
when defined(macosx): # Default compiler on Mac is clang without OpenMP and gcc is an alias to clang.
# Use Homebrew GCC instead for OpenMP support. GCC (v8), must be properly linked via `brew link gcc`
switches.add " --cc:gcc"
switches.add " --gcc.exe:/usr/local/bin/gcc-8"
switches.add " --gcc.linkerexe:/usr/local/bin/gcc-8"
test "tests_cpu", switches
task test_mkl, "Run all tests - Intel MKL - single threaded":
var switches: string
switches.mkl_singleSwitches()
test "tests_cpu", switches
task test_mkl_omp, "Run all tests - Intel MKL + OpenMP":
var switches = " -d:openmp"
switches.mkl_threadedSwitches()
test "tests_cpu", switches
task test_release, "Run all tests - Release mode":
test "tests_cpu", " -d:release"
task test_arc_release, "Run all tests under ARC - Release mode":
test "tests_cpu", " -d:release --gc:arc"
task test_orc_release, "Run all tests under ORC - Release mode":
test "tests_cpu", " -d:release --gc:orc"
template canImport(x: untyped): untyped =
compiles:
import x
when canImport(docs / docs):
# can define the `gen_docs` task (docs already imported now)
# this is to hack around weird nimble + nimscript behavior.
# when overwriting an install nimble will try to parse the generated
# nimscript file and for some reason then it won't be able to import
# the module (even if it's put into `src/`).
task gen_docs, "Generate Arraymancer documentation":
# generate nimdoc.cfg file so we can generate the correct header for the
# index.html page without having to mess with the HTML manually.
genNimdocCfg("src/arraymancer/")
# build the actual docs and the index
buildDocs(
"src/arraymancer/", "docs/build",
defaultFlags = "--hints:off --warnings:off"
)
# Copy our stylesheets
cpFile("docs/docutils.css", "docs/build/docutils.css")
cpFile("docs/nav.css", "docs/build/nav.css")
# Process the rst
for filePath in listFiles("docs/"):
if filePath[^4..^1] == ".rst":
let modName = filePath[5..^5]
exec r"nim rst2html -o:docs/build/" & modName & ".html " & filePath