forked from AMSC-24-25/20-fft-20-fft
-
Notifications
You must be signed in to change notification settings - Fork 0
/
sequential-1d-fast-ft.cpp
161 lines (148 loc) · 8.3 KB
/
sequential-1d-fast-ft.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
#include "fourier-transform-solver/sequential-1d-fast-ft.hpp"
#include "utils/bit-reversal.hpp"
#include <chrono>
void Sequential1DFastFT::compute() {
/**
* Sequential Cooley-Tukey Fast Fourier Transform (FFT) Algorithm.
* The algorithm is well commented in the code below.
* If you are looking for an uncommented version, please check the parallel version.
*/
printf("Running the Sequential Fast Fourier Transform solver...\n");
// 0. Prepare the variables.
// - get the size of the input vector
const size_t N = input.size();
/**
* - get the log2(N) value of the input vector,
* this because the algorithm iterates log2(N) times;
* because the divide-and-conquer algorithm splits the input vector
* in half at each iteration until it reaches the base case of 1 element
*/
const auto log2N = static_cast<size_t>(std::log2(N));
/**
* - the angle used in the algorithm;
* it is related to the complex exponential used to compute the FFT;
* since we need to compute the twiddle factors,
* we can precompute the angle value and "cache" it;
* the negative sign indicates the direction of the rotation in the complex plane,
* aligning with the definition of the DFT (Discrete Fourier Transform);
* remark: the twiddle factor is the exponential term in the DFT formula,
* and it represents a complex exponential term that "twiddles" or manipulates
* the phase of the input signals during the FFT computation;
* twiddle factor is: e^(-2 * pi * i * k * n / N), where:
* - i is the imaginary unit,
* - k is the current output index,
* - n is the input index,
* - N is the total number of points.
*/
constexpr double angle = -2 * M_PI;
// - save time before starting the computation (performance measurement)
const std::chrono::time_point<std::chrono::system_clock> start_time = std::chrono::high_resolution_clock::now();
// 1. Bit-Reversal Permutation.
sequentialBitReversal(input);
/**
* 2. Iterative Cooley-Tukey FFT.
* 2.1. Main loop - Stages: iterate log2(N) times.
* iterate over the stages of the FFT computation (log2(N) stages);
* each stage processes sub-problems of increasing size,
* combining smaller DFT results into larger ones.
*/
for (size_t s = 1; s <= log2N; ++s) {
/**
* Stage size value.
* the size of the sub-problems in the current stage of the FFT;
* we use the bitwise shift operator (<<) to compute the power of 2,
* which is a faster operation than pow(2, s); for example, 1 << 3 = 8, which is 2^3;
* the size of the subproblems is 2^s, and it is pretty clear that the subproblems are doubled at each stage;
* this is an interesting observation because it is a divide-and-conquer algorithm (Cooley-Tukey);
*/
const size_t m = 1 << s;
/**
* Midpoint value.
* half the size of the sub-problems in the current stage of the FFT;
* it is calculated using the bitwise shift operator (>>) to divide the size by 2,
* another faster operation than m / 2; for example, 8 >> 1 = 4, which is 8 / 2;
* it is used to determine the midpoint of the current sub-problems;
* since the Cooley-Tukey algorithm combines elements from the first and second halves
* of the subproblems during the butterfly calculation (see below),
* knowing the midpoint (m2) helps to pair these elements correctly and
* apply the twist factor correctly;
*/
const size_t m2 = m >> 1;
/**
* Stage Twiddle factor.
* precompute the twiddle factor for the current stage
* (the angle is divided by the current stage size)
* since the twiddle factor is a complex exponential,
* we can represent it in polar form (magnitude and angle);
* the magnitude is 1.0 (unit circle), and the angle is the angle value divided by the current stage size;
* before entering the inner loop, the twiddle factor is initialized to 1.0 because there is no rotation yet.
* see the w *= wm operation in the inner loop for the usage of the twiddle factor
*/
const std::complex<double> wm = std::polar(1.0, angle / static_cast<double>(m));
/**
* 2.2. Inner loop - Iterating over the 'sub-DFTs' in the current stage.
* the 'sub-DFTs' are computed in place and not recursively!
* the iteration is over the sub-DFTs in the current stage, processing each sub-DFT independently;
* the boundary of the iteration is the size of the input vector (N), so processing all the elements;
* the iteration step is the size of the current stage (m), i.e. jumping over the sub-DFTs;
* note that the step size m represents the current size of the sub-DFTs being processed in this stage;
*/
for (size_t k = 0; k < N; k += m) {
/**
* Incremental Twiddle factor.
* it is initialized to 1.0 (magnitude) and a phase angle of 0 (no rotation);
* it is used within the inner loop to apply the current twiddle factor
* to the elements being processed;
* it starts with no rotation, and it is updated at each iteration of the inner loop
* to apply the rotation (phase shift);
*/
std::complex<double> w{1.0, 0};
/**
* 2.3. Butterfly computation.
* the butterfly computation is the core of the Cooley-Tukey algorithm;
* at each iteration, the algorithm performs a butterfly operation on a pair of elements within the sub-DFT;
* this loop handles the combination of elements with the appropriate twiddle factor;
* it continues until the midpoint of the sub-DFT is reached:
* - it avoids processing the same pair of elements twice (redundancy);
* - it processes the first half of the sub-DFT and combines it with the second half;
*/
for (size_t j = 0; j < m2; ++j) {
/**
* Phase Adjustment.
* it represents the phase adjustment element from the second half of the current chunk;
* it is calculated by multiplying the incremental twiddle factor with the element from the second half;
* - for a chunk starting at index k, the first half spans indeces [k, k + m2 - 1];
* - therefore, the second half spans indeces [k + m2, k + m - 1];
* using m2, to access an element in the second half of the chunk, we use the index k + j + m2;
* it is called phase adjustment because it refers to changing the phase of an element
* by multiplying it by the twiddle factor
* (it increases the mapping from data to frequency domain);
*/
std::complex<double> t = w * input[k + j + m2];
/**
* First Half element.
*/
std::complex<double> u = input[k + j];
/**
* Butterfly Operation.
* computation of the first half (k+j) and second half (k+j+m2) of the butterfly operation;
* the operations ensure that the elements are combined correctly and that the entire
* frequency spectrum of the input signal is captured;
*/
input[k + j] = u + t;
input[k + j + m2] = u - t;
/**
* Twiddle Factor Update.
* the twiddle factor is applied during each stage of the FFT computation;
*/
w *= wm;
}
}
}
// save time after the computation is done (performance measurement)
const std::chrono::time_point<std::chrono::system_clock> end_time = std::chrono::high_resolution_clock::now();
// save the solution to the output vector
solution = input;
printf("Sequential 1D Fast Fourier Transform computed. Elapsed time: %f ms.\n",
std::chrono::duration<float, std::milli>(end_time - start_time).count());
}