@@ -549,7 +549,7 @@ def scatteringkernaltest(
549549 wavelength ,
550550 )
551551 if temp == lengths :
552- print ("error" , network_index [cu_ray_num , i ], lengths )
552+ print ("numba-cuda error" , network_index [cu_ray_num , i ], lengths )
553553
554554 # convert field amplitudes to tangential surface currents
555555 if (i < network_index .shape [1 ] - 1 ) and (
@@ -643,7 +643,7 @@ def scatteringkernalv3(
643643 sink_index = network_index [cu_ray_num , sink_test ] - 1 - problem_size [0 ]
644644
645645 if flag == 0 :
646- print ("error" , cu_ray_num , sink_index )
646+ print ("numba-cuda error" , cu_ray_num , sink_index )
647647 # print(cu_ray_num,sink_index)
648648 # else:
649649 # sink_index=network_index[cu_ray_num,-1]-1-problem_size[0]
@@ -793,12 +793,19 @@ def lossy_propagation(point1, point2, alpha, beta):
793793 normal [2 ] = point2 ["nz" ]
794794 projection_dot = dot_vec (outgoing_dir , normal )
795795 front = - (1 / (2 * cmath .pi ))
796+ print ("numba-cuda front" , front )
797+ print ("numba-cuda projection_dot" , projection_dot )
798+
796799 s = 2.5
797800 distance_loss = 1.0 / ((1 + length [0 ] ** s ) ** (1 / s ))
798801 G = (cmath .exp (- (alpha [0 ] + 1j * beta [0 ]) * length [0 ])) * distance_loss
799802
800803 #dG = (-(alpha[0] + 1j * beta[0]) - complex64((distance_loss))) * G
801804 dG = (- (alpha [0 ] + 1j * beta [0 ])) * G
805+ print ("numba-cuda dG" , dG .real , dG .imag )
806+ print ("numba-cuda G" , G .real , G .imag )
807+ print ("numba-cuda ray_direction" , outgoing_dir [0 ], outgoing_dir [1 ], outgoing_dir [2 ])
808+ print ("numba-cuda raylength" , length [0 ])
802809 loss = front * dG * projection_dot
803810
804811 return loss
@@ -966,7 +973,7 @@ def scatteringkernaltest(
966973 sink_index = network_index [cu_ray_num , sink_test ] - 1 - problem_size [0 ]
967974
968975 if flag == 0 :
969- print ("error" , cu_ray_num , sink_index )
976+ print ("numba-cuda error" , cu_ray_num , sink_index )
970977
971978 scattering_matrix [cu_ray_num ] = complex (sink_index )
972979
@@ -1096,7 +1103,8 @@ def freqdomainkernal(
10961103 cu_ray_num = cuda .grid (1 ) # alias for threadIdx.x + ( blockIdx.x * blockDim.x ),
10971104 # threadIdx.y + ( blockIdx.y * blockDim.y )
10981105 # margin=1e-5
1099- if cu_ray_num < network_index .shape [0 ]:
1106+ stride = cuda .gridsize (1 )
1107+ for i in range (cu_ray_num ,network_index .shape [0 ],stride ):
11001108 # noinspection PyTypeChecker
11011109 ray_component = cuda .local .array (shape = (3 ), dtype = np .complex128 )
11021110 # ray_components[cu_ray_num,:]=0.0
@@ -1172,7 +1180,10 @@ def freqdomainkernal(
11721180 point_information [network_index [cu_ray_num , i + 1 ] - 1 ],
11731181 outgoing_dir ,
11741182 )
1183+ print ("numba-cuda ray_field pre launch" ,ray_component [0 ].real , "+" , ray_component [0 ].imag , "i " ,ray_component [1 ].real , "+" , ray_component [1 ].imag , "i " ,ray_component [2 ].real , "+" , ray_component [2 ].imag , "i" )
1184+
11751185 ray_component = sourcelaunchtransformGPU (ray_component , outgoing_dir )
1186+ print ("numba-cuda rayfield post launch" ,ray_component [0 ].real , "+" , ray_component [0 ].imag , "i " ,ray_component [1 ].real , "+" , ray_component [1 ].imag , "i " ,ray_component [2 ].real , "+" , ray_component [2 ].imag , "i" )
11761187
11771188 ray_component [0 ] = (
11781189 ray_component [0 ]
@@ -1193,12 +1204,15 @@ def freqdomainkernal(
11931204 # scatter_coefficient=(1/(4*cmath.pi))**(complex(scatter_index))
11941205 # alpha = 0.0
11951206 # beta = (2.0 * cmath.pi) / wavelength[0]
1207+ print ("numba-cuda alpha" , alpha [0 ], "beta" , beta [0 ])
1208+
11961209 loss = lossy_propagation (
11971210 point_information [network_index [cu_ray_num , 0 ] - 1 ],
11981211 point_information [network_index [cu_ray_num , 1 ] - 1 ],
11991212 alpha ,
12001213 beta ,
12011214 )
1215+
12021216 for i in range (1 , network_index .shape [1 ] - 1 ):
12031217 if network_index [cu_ray_num , i + 1 ] != 0 :
12041218
@@ -1208,10 +1222,12 @@ def freqdomainkernal(
12081222 alpha ,
12091223 beta ,
12101224 )
1225+ print ("numba-cuda loss" , loss .real , "+" , loss .imag , "i" )
12111226
12121227 ray_component [0 ] *= loss
12131228 ray_component [1 ] *= loss
12141229 ray_component [2 ] *= loss
1230+ print ("numba-cuda ray_component after loss" , ray_component [0 ].real , "+" , ray_component [0 ].imag , "i " ,ray_component [1 ].real , "+" , ray_component [1 ].imag , "i " ,ray_component [2 ].real , "+" , ray_component [2 ].imag , "i" )
12151231 # print(ray_component[0].real,ray_component[1].real,ray_component[2].real)
12161232 # add real components
12171233 cuda .atomic .add (
@@ -1852,7 +1868,7 @@ def pathlength(network_index, point_information, distances):
18521868 lengths ,
18531869 )
18541870 if temp == lengths :
1855- print ("error" , network_index [cu_ray_num , i ], lengths )
1871+ print ("numba-cuda error" , network_index [cu_ray_num , i ], lengths )
18561872
18571873 i += 1
18581874
@@ -2232,7 +2248,7 @@ def EMGPUFreqDomain(
22322248 )
22332249 if memory_requirements >= (0.95 * free_mem ):
22342250 # chunking required
2235- # print("Number of Chunks",np.ceil(memory_requirements/max_mem).astype(int)+1)
2251+ # print("numba-cuda Number of Chunks",np.ceil(memory_requirements/max_mem).astype(int)+1)
22362252 # create chunks based upon number of chunks required
22372253 num_chunks = np .ceil (memory_requirements / max_mem ).astype (int ) + 1
22382254 if num_chunks < 0 :
@@ -2308,7 +2324,7 @@ def EMGPUFreqDomain(
23082324 # print(grids,' blocks, ',threads,' threads')
23092325 # Execute the kernel
23102326 # cuda.profile_start()
2311- freqdomainkernal [grids , threads ](
2327+ freqdomainkernal [1 , 1 ](
23122328 d_temp_index ,
23132329 d_point_information ,
23142330 d_temp_target_index ,
@@ -2377,7 +2393,7 @@ def EMGPUFreqDomain(
23772393 # print(grids,' blocks, ',threads,' threads')
23782394 # Execute the kernel
23792395 # cuda.profile_start()
2380- freqdomainkernal [grids , threads ](
2396+ freqdomainkernal [1 , 1 ](
23812397 d_full_index ,
23822398 d_point_information ,
23832399 d_target_index ,
@@ -2956,7 +2972,7 @@ def TimeDomainv3(
29562972 flag = True
29572973 if np .ceil (time_map .nbytes / 1e9 ) > 1 :
29582974 # setup time_map chunking
2959- print ("source chunking " , time_map .nbytes / 1e9 , "Gb" )
2975+ print ("numba-cuda source chunking " , time_map .nbytes / 1e9 , "Gb" )
29602976 num_chunks = np .ceil (time_map .nbytes / 1e9 ).astype (np .int32 )
29612977 source_chunking = np .linspace (0 , source_num , num_chunks + 1 ).astype (np .int32 )
29622978 # setup wake time as a second
@@ -3212,7 +3228,7 @@ def TimeDomainThetaPhi(
32123228 flag = True
32133229 if np .ceil (time_map .nbytes / 1e9 ) > 1 :
32143230 # setup time_map chunking
3215- print ("source chunking " , time_map .nbytes / 1e9 , "Gb" )
3231+ print ("numba-cuda source chunking " , time_map .nbytes / 1e9 , "Gb" )
32163232 num_chunks = np .ceil (time_map .nbytes / 1e9 ).astype (np .int32 )
32173233 source_chunking = np .linspace (0 , source_num , num_chunks + 1 ).astype (np .int32 )
32183234 # setup wake time as a second
0 commit comments