@@ -220,13 +220,26 @@ mod gpu {
220
220
221
221
#[ cfg( feature = "hip" ) ]
222
222
let mut gpu_target = {
223
+ const DEFAULT_HIP_ARCHES : & [ & str ] = & [ "gfx90a" ] ;
224
+
223
225
let hip_path = hip_sys:: hiprt:: get_hip_path ( ) ;
226
+ if !hip_path. exists ( ) {
227
+ panic ! ( "Couldn't find HIP path at {}" , hip_path. display( ) ) ;
228
+ }
229
+
224
230
// It seems that various ROCm releases change where hipcc is...
225
231
let mut compiler = hip_path. join ( "bin/hipcc" ) ;
226
232
if !compiler. exists ( ) {
227
233
// Try the dir above, which might be the ROCm dir.
228
234
compiler = hip_path. join ( "../bin/hipcc" ) ;
229
235
}
236
+ if !compiler. exists ( ) {
237
+ panic ! (
238
+ "Couldn't find hipcc in {}/bin/hipcc or {}" ,
239
+ hip_path. display( ) ,
240
+ compiler. display( )
241
+ ) ;
242
+ }
230
243
let mut hip_target = cc:: Build :: new ( ) ;
231
244
hip_target
232
245
. compiler ( compiler)
@@ -235,6 +248,34 @@ mod gpu {
235
248
. file ( "src/fee/gpu/fee.cu" )
236
249
. file ( "src/analytic/gpu/analytic.cu" ) ;
237
250
251
+ hip_target. flag ( "-O0" ) ; // <- hip can't handle optimizations
252
+
253
+ println ! ( "cargo:rerun-if-env-changed=HYPERBEAM_HIP_ARCH" ) ;
254
+ println ! ( "cargo:rerun-if-env-changed=HYPERDRIVE_HIP_ARCH" ) ;
255
+ let arches: Vec < String > = match (
256
+ env:: var ( "HYPERBEAM_HIP_ARCH" ) ,
257
+ env:: var ( "HYPERDRIVE_HIP_ARCH" ) ,
258
+ ) {
259
+ // When a user-supplied variable exists, use it as the CUDA arch and
260
+ // compute level.
261
+ ( Ok ( c) , _) | ( Err ( _) , Ok ( c) ) => {
262
+ vec ! [ c]
263
+ }
264
+ _ => {
265
+ // Print out all of the default arches and computes as a
266
+ // warning.
267
+ println ! ( "cargo:warning=No HYPERBEAM_HIP_ARCH; Passing --offload-arch={DEFAULT_HIP_ARCHES:?} to hip" ) ;
268
+ DEFAULT_HIP_ARCHES
269
+ . iter ( )
270
+ . map ( |& s| String :: from ( s) )
271
+ . collect ( )
272
+ }
273
+ } ;
274
+
275
+ for arch in arches {
276
+ hip_target. flag ( & format ! ( "--offload-arch={arch}" ) ) ;
277
+ }
278
+
238
279
hip_target
239
280
} ;
240
281
0 commit comments