Skip to content

Commit d0d426d

Browse files
committed
make both em portions go one at a time with print statements version 0.07
1 parent 4c7714b commit d0d426d

File tree

4 files changed

+39
-17
lines changed

4 files changed

+39
-17
lines changed

conda/meta.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
package:
22
name: lyceanem
3-
version: 0.0.6
3+
version: 0.0.7
44
source:
55
path: ..
66

lyceanem/electromagnetics/empropagation.py

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -549,7 +549,7 @@ def scatteringkernaltest(
549549
wavelength,
550550
)
551551
if temp == lengths:
552-
print("error", network_index[cu_ray_num, i], lengths)
552+
print("numba-cuda error", network_index[cu_ray_num, i], lengths)
553553

554554
# convert field amplitudes to tangential surface currents
555555
if (i < network_index.shape[1] - 1) and (
@@ -643,7 +643,7 @@ def scatteringkernalv3(
643643
sink_index = network_index[cu_ray_num, sink_test] - 1 - problem_size[0]
644644

645645
if flag == 0:
646-
print("error", cu_ray_num, sink_index)
646+
print("numba-cuda error", cu_ray_num, sink_index)
647647
# print(cu_ray_num,sink_index)
648648
# else:
649649
# sink_index=network_index[cu_ray_num,-1]-1-problem_size[0]
@@ -793,12 +793,19 @@ def lossy_propagation(point1, point2, alpha, beta):
793793
normal[2] = point2["nz"]
794794
projection_dot = dot_vec(outgoing_dir, normal)
795795
front = -(1 / (2 * cmath.pi))
796+
print("numba-cuda front", front)
797+
print("numba-cuda projection_dot", projection_dot)
798+
796799
s = 2.5
797800
distance_loss = 1.0 / ((1 + length[0] ** s) ** (1 / s))
798801
G = (cmath.exp(-(alpha[0] + 1j * beta[0]) * length[0])) * distance_loss
799802

800803
#dG = (-(alpha[0] + 1j * beta[0]) - complex64((distance_loss))) * G
801804
dG = (-(alpha[0] + 1j * beta[0])) * G
805+
print("numba-cuda dG", dG.real, dG.imag)
806+
print("numba-cuda G", G.real, G.imag)
807+
print("numba-cuda ray_direction", outgoing_dir[0], outgoing_dir[1], outgoing_dir[2])
808+
print("numba-cuda raylength", length[0])
802809
loss = front * dG * projection_dot
803810

804811
return loss
@@ -966,7 +973,7 @@ def scatteringkernaltest(
966973
sink_index = network_index[cu_ray_num, sink_test] - 1 - problem_size[0]
967974

968975
if flag == 0:
969-
print("error", cu_ray_num, sink_index)
976+
print("numba-cuda error", cu_ray_num, sink_index)
970977

971978
scattering_matrix[cu_ray_num] = complex(sink_index)
972979

@@ -1096,7 +1103,8 @@ def freqdomainkernal(
10961103
cu_ray_num = cuda.grid(1) # alias for threadIdx.x + ( blockIdx.x * blockDim.x ),
10971104
# threadIdx.y + ( blockIdx.y * blockDim.y )
10981105
# margin=1e-5
1099-
if cu_ray_num < network_index.shape[0]:
1106+
stride = cuda.gridsize(1)
1107+
for i in range (cu_ray_num,network_index.shape[0],stride):
11001108
# noinspection PyTypeChecker
11011109
ray_component = cuda.local.array(shape=(3), dtype=np.complex128)
11021110
# ray_components[cu_ray_num,:]=0.0
@@ -1172,7 +1180,10 @@ def freqdomainkernal(
11721180
point_information[network_index[cu_ray_num, i + 1] - 1],
11731181
outgoing_dir,
11741182
)
1183+
print("numba-cuda ray_field pre launch",ray_component[0].real, "+", ray_component[0].imag, "i ",ray_component[1].real, "+", ray_component[1].imag, "i ",ray_component[2].real, "+", ray_component[2].imag, "i")
1184+
11751185
ray_component = sourcelaunchtransformGPU(ray_component, outgoing_dir)
1186+
print("numba-cuda rayfield post launch",ray_component[0].real, "+", ray_component[0].imag, "i ",ray_component[1].real, "+", ray_component[1].imag, "i ",ray_component[2].real, "+", ray_component[2].imag, "i")
11761187

11771188
ray_component[0] = (
11781189
ray_component[0]
@@ -1193,12 +1204,15 @@ def freqdomainkernal(
11931204
# scatter_coefficient=(1/(4*cmath.pi))**(complex(scatter_index))
11941205
# alpha = 0.0
11951206
# beta = (2.0 * cmath.pi) / wavelength[0]
1207+
print("numba-cuda alpha", alpha[0], "beta", beta[0])
1208+
11961209
loss = lossy_propagation(
11971210
point_information[network_index[cu_ray_num, 0] - 1],
11981211
point_information[network_index[cu_ray_num, 1] - 1],
11991212
alpha,
12001213
beta,
12011214
)
1215+
12021216
for i in range(1, network_index.shape[1] - 1):
12031217
if network_index[cu_ray_num, i + 1] != 0:
12041218

@@ -1208,10 +1222,12 @@ def freqdomainkernal(
12081222
alpha,
12091223
beta,
12101224
)
1225+
print("numba-cuda loss", loss.real, "+", loss.imag, "i")
12111226

12121227
ray_component[0] *= loss
12131228
ray_component[1] *= loss
12141229
ray_component[2] *= loss
1230+
print("numba-cuda ray_component after loss", ray_component[0].real, "+", ray_component[0].imag, "i ",ray_component[1].real, "+", ray_component[1].imag, "i ",ray_component[2].real, "+", ray_component[2].imag, "i")
12151231
# print(ray_component[0].real,ray_component[1].real,ray_component[2].real)
12161232
# add real components
12171233
cuda.atomic.add(
@@ -1852,7 +1868,7 @@ def pathlength(network_index, point_information, distances):
18521868
lengths,
18531869
)
18541870
if temp == lengths:
1855-
print("error", network_index[cu_ray_num, i], lengths)
1871+
print("numba-cuda error", network_index[cu_ray_num, i], lengths)
18561872

18571873
i += 1
18581874

@@ -2232,7 +2248,7 @@ def EMGPUFreqDomain(
22322248
)
22332249
if memory_requirements >= (0.95 * free_mem):
22342250
# chunking required
2235-
# print("Number of Chunks",np.ceil(memory_requirements/max_mem).astype(int)+1)
2251+
# print("numba-cuda Number of Chunks",np.ceil(memory_requirements/max_mem).astype(int)+1)
22362252
# create chunks based upon number of chunks required
22372253
num_chunks = np.ceil(memory_requirements / max_mem).astype(int) + 1
22382254
if num_chunks < 0:
@@ -2308,7 +2324,7 @@ def EMGPUFreqDomain(
23082324
# print(grids,' blocks, ',threads,' threads')
23092325
# Execute the kernel
23102326
# cuda.profile_start()
2311-
freqdomainkernal[grids, threads](
2327+
freqdomainkernal[1, 1](
23122328
d_temp_index,
23132329
d_point_information,
23142330
d_temp_target_index,
@@ -2377,7 +2393,7 @@ def EMGPUFreqDomain(
23772393
# print(grids,' blocks, ',threads,' threads')
23782394
# Execute the kernel
23792395
# cuda.profile_start()
2380-
freqdomainkernal[grids, threads](
2396+
freqdomainkernal[1, 1](
23812397
d_full_index,
23822398
d_point_information,
23832399
d_target_index,
@@ -2956,7 +2972,7 @@ def TimeDomainv3(
29562972
flag = True
29572973
if np.ceil(time_map.nbytes / 1e9) > 1:
29582974
# setup time_map chunking
2959-
print("source chunking ", time_map.nbytes / 1e9, "Gb")
2975+
print("numba-cuda source chunking ", time_map.nbytes / 1e9, "Gb")
29602976
num_chunks = np.ceil(time_map.nbytes / 1e9).astype(np.int32)
29612977
source_chunking = np.linspace(0, source_num, num_chunks + 1).astype(np.int32)
29622978
# setup wake time as a second
@@ -3212,7 +3228,7 @@ def TimeDomainThetaPhi(
32123228
flag = True
32133229
if np.ceil(time_map.nbytes / 1e9) > 1:
32143230
# setup time_map chunking
3215-
print("source chunking ", time_map.nbytes / 1e9, "Gb")
3231+
print("numba-cuda source chunking ", time_map.nbytes / 1e9, "Gb")
32163232
num_chunks = np.ceil(time_map.nbytes / 1e9).astype(np.int32)
32173233
source_chunking = np.linspace(0, source_num, num_chunks + 1).astype(np.int32)
32183234
# setup wake time as a second

lyceanem/src/em.cuh

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -63,16 +63,15 @@ __device__ __inline__ complex_float3 ray_launch(const complex_float3 & e_field,f
6363

6464
__device__ __inline__ complex_float3 em_wave(const float2 alpha_beta, const float4& ray, const PointData& origin, const PointData& end, float wave_length) {
6565
//printf(("package- alpha_beta = (%f, %f)\n", alpha_beta.x, alpha_beta.y);
66-
// printf("package- ray field %f +i%f, %f +i%f, %f +i%f\n", origin.electric_field.x.x, origin.electric_field.x.y, origin.electric_field.y.x, origin.electric_field.y.y, origin.electric_field.z.x, origin.electric_field.z.y);
66+
printf("package- ray field %f +i%f, %f +i%f, %f +i%f\n", origin.electric_field.x.x, origin.electric_field.x.y, origin.electric_field.y.x, origin.electric_field.y.y, origin.electric_field.z.x, origin.electric_field.z.y);
6767

6868

6969

7070
complex_float3 ray_field = ray_launch(origin.electric_field, make_float3(ray.x, ray.y, ray.z));
71-
// printf("package- ray_field after launch = (%f + %fi, %f + %fi, %f + %fi)\n", ray_field.x.x, ray_field.x.y, ray_field.y.x, ray_field.y.y, ray_field.z.x, ray_field.z.y);
71+
printf("package- ray_field after launch = (%f + %fi, %f + %fi, %f + %fi)\n", ray_field.x.x, ray_field.x.y, ray_field.y.x, ray_field.y.y, ray_field.z.x, ray_field.z.y);
7272

7373

7474
float front = -(1 / (2 * CUDART_PI_F));
75-
//printf(("package- front = %f\n", front);
7675

7776
cuFloatComplex G;
7877
// printf("inputs to sincosf = (%f)\n", -alpha_beta.y * ray.w);
@@ -100,12 +99,19 @@ __device__ __inline__ complex_float3 em_wave(const float2 alpha_beta, const floa
10099
// printf("package- ray direction = (%f, %f, %f)\n", ray_dir.x, ray_dir.y, ray_dir.z);
101100

102101
float dot_val = dot(end.normal, ray_dir);
103-
// printf("package- dot(end.normal, ray_dir) = %f\n", dot_val);
102+
printf("front = %f\n", front);
103+
104+
printf(" dot(end.normal, ray_dir) = %f\n", dot_val);
105+
printf("dg = (%f + %fi)\n", dG.x, dG.y);
106+
printf("G = (%f + %fi)\n", G.x, G.y);
107+
printf("ray_dir = (%f, %f, %f)\n", ray_dir.x, ray_dir.y, ray_dir.z);
108+
printf("raylength = %f\n", ray.w);
104109
cuFloatComplex loss = front * dG * dot(end.normal, make_float3(ray.x, ray.y, ray.z));
105110
// printf("package- loss = (%f + %fi)\n", loss.x, loss.y);
106111

107112
ray_field *= loss;
108-
// printf("package- ray_field after loss = (%f + %fi, %f + %fi, %f + %fi)\n", ray_field.x.x, ray_field.x.y, ray_field.y.x, ray_field.y.y, ray_field.z.x, ray_field.z.y);
113+
114+
printf("ray_field after loss = (%f + %fi, %f + %fi, %f + %fi)\n", ray_field.x.x, ray_field.x.y, ray_field.y.x, ray_field.y.y, ray_field.z.x, ray_field.z.y);
109115

110116

111117
return ray_field;

lyceanem/src/raycasting_accelerated.cuh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -567,7 +567,7 @@ void raycast_wrapper_tiles (float *source, float *end, int source_num, int end_n
567567
cudaMemset(d_scattering_network, 0, scattering_network_size);
568568
cudaDeviceSynchronize();
569569

570-
raycast_tiles<<<32,256>>>(d_source,d_end,d_ray,source_num,end_num,not_self_to_self,d_tri_vertex,d_binned_triangles,d_tri_num_per_bin,source_num*end_num, d_ray_index,num_bins,x_top_bottom,y_range,z_range,d_points,wave_length,d_scattering_network, alpha_beta);
570+
raycast_tiles<<<1,1,1>>>(d_source,d_end,d_ray,source_num,end_num,not_self_to_self,d_tri_vertex,d_binned_triangles,d_tri_num_per_bin,source_num*end_num, d_ray_index,num_bins,x_top_bottom,y_range,z_range,d_points,wave_length,d_scattering_network, alpha_beta);
571571
//get last error
572572

573573
gpuErrchk( cudaGetLastError() );

0 commit comments

Comments
 (0)