diff --git a/Cargo.toml b/Cargo.toml
index b64fb50..326d114 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -23,6 +23,7 @@ complex-nums = ["dep:num-complex", "dep:bytemuck"]
 [dev-dependencies]
 criterion = "0.5.1"
 fftw = "0.8.0"
+realfft = "3.3.0"
 rand = "0.8.5"
 utilities = { path = "utilities" }
 
diff --git a/assets/lines.png b/assets/lines.png
new file mode 100644
index 0000000..6767aa6
Binary files /dev/null and b/assets/lines.png differ
diff --git a/benches/README.md b/benches/README.md
index e276e06..058ba1f 100644
--- a/benches/README.md
+++ b/benches/README.md
@@ -78,6 +78,30 @@ The generated images will be saved in your working directory.
 | Active Toolchain          | nightly-x86_64-unknown-linux-gnu (default)                                                      |
 | Rustc Version             | rustc 1.79.0-nightly (7f2fc33da 2024-04-22)                                                     |
 
+## Measuruing throughput with `criterion`
+
+0. Install gnuplot if you want `criterion` to use gnuplot as the plotting backend.
+   On macOS, just run:
+```bash
+brew install gnuplot 
+```
+
+1. Run benchmarks
+```bash
+cargo bench
+```
+
+2. Open the report using a browser. The report will be located in the `target` directory. 
+   For example, from the root of the `PhastFT` repository (on macOS) run:
+```bash
+open -a firefox target/criterion/r2c_versus_c2c/report/index.html
+```
+
+### Results
+
+![Alt text](../assets/lines.png)
+
+
 ## Profiling
 
 Navigate to the cloned repo:
diff --git a/benches/bench.rs b/benches/bench.rs
index 00d7833..9a49f74 100644
--- a/benches/bench.rs
+++ b/benches/bench.rs
@@ -1,6 +1,11 @@
 use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
+use realfft::num_complex::Complex;
+use realfft::RealFftPlanner;
+use utilities::gen_random_signal;
+
 use num_traits::Float;
 use phastft::{
+    fft::r2c_fft_f64,
     fft_32_with_opts_and_plan, fft_64_with_opts_and_plan,
     options::Options,
     planner::{Direction, Planner32, Planner64},
@@ -9,9 +14,67 @@ use rand::{
     distributions::{Distribution, Standard},
     thread_rng, Rng,
 };
-use utilities::rustfft::num_complex::Complex;
 use utilities::rustfft::FftPlanner;
 
+fn benchmark_r2c_vs_c2c(c: &mut Criterion) {
+    let sizes = vec![1 << 10, 1 << 12, 1 << 14, 1 << 16, 1 << 18, 1 << 20];
+
+    let mut group = c.benchmark_group("r2c_versus_c2c");
+    for &size in &sizes {
+        group.throughput(Throughput::Elements(size as u64));
+
+        group.bench_with_input(BenchmarkId::new("r2c_fft", size), &size, |b, &size| {
+            let mut s_re = vec![0.0; size];
+            let mut s_im = vec![0.0; size];
+            gen_random_signal(&mut s_re, &mut s_im);
+            let mut output_re = vec![0.0; size];
+            let mut output_im = vec![0.0; size];
+
+            b.iter(|| {
+                r2c_fft_f64(
+                    black_box(&s_re),
+                    black_box(&mut output_re),
+                    black_box(&mut output_im),
+                );
+            });
+        });
+
+        group.bench_with_input(BenchmarkId::new("c2c_fft", size), &size, |b, &size| {
+            let mut s_re = vec![0.0; size];
+            let mut s_im = vec![0.0; size];
+            gen_random_signal(&mut s_re, &mut s_im);
+            s_im = vec![0.0; size];
+
+            let options = Options::guess_options(s_re.len());
+            let planner = Planner64::new(s_re.len(), Direction::Reverse);
+
+            b.iter(|| {
+                fft_64_with_opts_and_plan(
+                    black_box(&mut s_re),
+                    black_box(&mut s_im),
+                    black_box(&options),
+                    black_box(&planner),
+                );
+            });
+        });
+
+        group.bench_with_input(BenchmarkId::new("real_fft", size), &size, |b, &size| {
+            let mut s_re = vec![0.0; size];
+            let mut s_im = vec![0.0; size];
+            gen_random_signal(&mut s_re, &mut s_im);
+            let mut output = vec![Complex::default(); s_re.len() / 2 + 1];
+            let mut planner = RealFftPlanner::<f64>::new();
+
+            b.iter(|| {
+                planner
+                    .plan_fft_forward(s_re.len())
+                    .process(&mut s_re, &mut output)
+                    .expect("fft.process() failed!");
+            });
+        });
+    }
+}
+
 const LENGTHS: &[usize] = &[
     6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
 ];
@@ -170,11 +233,13 @@ fn benchmark_inverse_f64(c: &mut Criterion) {
     }
 }
 
-criterion_group!(
-    benches,
-    benchmark_forward_f32,
-    benchmark_inverse_f32,
-    benchmark_forward_f64,
-    benchmark_inverse_f64
-);
+fn critertion_benchmark(c: &mut Criterion) {
+    benchmark_forward_f32(c);
+    benchmark_inverse_f32(c);
+    benchmark_forward_f64(c);
+    benchmark_inverse_f64(c);
+    benchmark_r2c_vs_c2c(c);
+}
+
+criterion_group!(benches, critertion_benchmark,);
 criterion_main!(benches);
diff --git a/examples/profile.rs b/examples/profile.rs
index dd31597..910b20e 100644
--- a/examples/profile.rs
+++ b/examples/profile.rs
@@ -1,22 +1,31 @@
 use std::env;
 use std::str::FromStr;
 
-use utilities::gen_random_signal;
-
-use phastft::fft_64_with_opts_and_plan;
-use phastft::options::Options;
-use phastft::planner::{Direction, Planner64};
-
-fn benchmark_fft_64(n: usize) {
+use phastft::fft::r2c_fft_f64;
+// use phastft::fft_64;
+// use phastft::fft_64_with_opts_and_plan;
+// use phastft::options::Options;
+// use phastft::planner::{Direction, Planner64};
+// use utilities::gen_random_signal;
+
+// fn benchmark_fft_64(n: usize) {
+//     let big_n = 1 << n;
+//     let mut reals = vec![0.0; big_n];
+//     let mut imags = vec![0.0; big_n];
+//     gen_random_signal(&mut reals, &mut imags);
+//
+//     let planner = Planner64::new(reals.len(), Direction::Forward);
+//     let opts = Options::guess_options(reals.len());
+//
+//     fft_64_with_opts_and_plan(&mut reals, &mut imags, &opts, &planner);
+// }
+
+fn benchmark_r2c_fft(n: usize) {
     let big_n = 1 << n;
-    let mut reals = vec![0.0; big_n];
-    let mut imags = vec![0.0; big_n];
-    gen_random_signal(&mut reals, &mut imags);
-
-    let planner = Planner64::new(reals.len(), Direction::Forward);
-    let opts = Options::guess_options(reals.len());
-
-    fft_64_with_opts_and_plan(&mut reals, &mut imags, &opts, &planner);
+    let reals: Vec<f64> = (1..=big_n).map(|i| i as f64).collect();
+    let mut output_re = vec![0.0; big_n];
+    let mut output_im = vec![0.0; big_n];
+    r2c_fft_f64(&reals, &mut output_re, &mut output_im);
 }
 
 fn main() {
@@ -24,6 +33,6 @@ fn main() {
     assert_eq!(args.len(), 2, "Usage {} <n>", args[0]);
 
     let n = usize::from_str(&args[1]).unwrap();
-
-    benchmark_fft_64(n);
+    benchmark_r2c_fft(n);
+    // benchmark_fft_64(n);
 }
diff --git a/pyphastft/audio_visual.py b/pyphastft/audio_visual.py
new file mode 100644
index 0000000..2396ebf
--- /dev/null
+++ b/pyphastft/audio_visual.py
@@ -0,0 +1,111 @@
+import sys
+import numpy as np
+import pyaudio
+import pyqtgraph as pg
+from pyphastft import rfft
+from pyqtgraph.Qt import QtWidgets, QtCore
+
+class RealTimeAudioSpectrum(QtWidgets.QWidget):
+	def __init__(self, parent=None):
+		super(RealTimeAudioSpectrum, self).__init__(parent)
+		self.n_fft_bins = 1024
+		self.n_display_bins = 64
+		self.sample_rate = 44100
+		self.smoothing_factor = 0.1  # Fine-tuned smoothing factor
+		self.ema_fft_data = np.zeros(self.n_display_bins)
+		self.init_ui()
+		self.init_audio_stream()
+
+	def init_ui(self):
+		self.layout = QtWidgets.QVBoxLayout(self)
+		self.plot_widget = pg.PlotWidget()
+		self.layout.addWidget(self.plot_widget)
+
+		self.plot_widget.setBackground("k")
+		self.plot_item = self.plot_widget.getPlotItem()
+		self.plot_item.setTitle(
+				"Real-Time Audio Spectrum Visualizer powered by PhastFT",
+				color="w",
+				size="16pt",
+				)
+
+		self.plot_item.getAxis("left").hide()
+		self.plot_item.getAxis("bottom").hide()
+
+		self.plot_item.setXRange(0, self.sample_rate / 2, padding=0)
+		self.plot_item.setYRange(0, 1, padding=0)
+
+		self.bar_width = (self.sample_rate / 2) / self.n_display_bins * 0.8
+		self.freqs = np.linspace(
+				0, self.sample_rate / 2, self.n_display_bins, endpoint=False
+				)
+
+		self.bar_graph = pg.BarGraphItem(
+				x=self.freqs,
+				height=np.zeros(self.n_display_bins),
+				width=self.bar_width,
+				brush=pg.mkBrush("m"),
+				)
+		self.plot_item.addItem(self.bar_graph)
+
+		self.timer = QtCore.QTimer()
+		self.timer.timeout.connect(self.update)
+		self.timer.start(50)
+
+	def init_audio_stream(self):
+		self.p = pyaudio.PyAudio()
+		self.stream = self.p.open(
+				format=pyaudio.paFloat32,
+				channels=1,
+				rate=self.sample_rate,
+				input=True,
+				frames_per_buffer=self.n_fft_bins,
+				stream_callback=self.audio_callback,
+				)
+		self.stream.start_stream()
+
+	def audio_callback(self, in_data, frame_count, time_info, status):
+		audio_data = np.frombuffer(in_data, dtype=np.float32)
+		audio_data = np.ascontiguousarray(audio_data, dtype=np.float64)
+		reals, imags = rfft(audio_data, direction="f")
+		reals = np.ascontiguousarray(reals)
+		imags = np.ascontiguousarray(imags)
+		fft_magnitude = np.sqrt(reals**2 + imags**2)[: self.n_fft_bins // 2]
+
+		new_fft_data = np.interp(
+				np.linspace(0, len(fft_magnitude), self.n_display_bins),
+				np.arange(len(fft_magnitude)),
+				fft_magnitude,
+				)
+
+		new_fft_data = np.log1p(new_fft_data)  # Apply logarithmic scaling
+
+		self.ema_fft_data = self.ema_fft_data * self.smoothing_factor + new_fft_data * (
+				1.0 - self.smoothing_factor
+				)
+
+		return in_data, pyaudio.paContinue
+
+	def update(self):
+		# Normalize the FFT data to ensure it fits within the display range
+		max_value = np.max(self.ema_fft_data)
+		if max_value > 0:
+			normalized_fft_data = self.ema_fft_data / max_value
+		else:
+			normalized_fft_data = self.ema_fft_data
+
+		self.bar_graph.setOpts(height=normalized_fft_data, width=self.bar_width)
+
+	def closeEvent(self, event):
+		self.stream.stop_stream()
+		self.stream.close()
+		self.p.terminate()
+		event.accept()
+
+if __name__ == "__main__":
+	app = QtWidgets.QApplication(sys.argv)
+	window = RealTimeAudioSpectrum()
+	window.show()
+	sys.exit(app.exec_())
+
+
diff --git a/pyphastft/requirements.txt b/pyphastft/requirements.txt
new file mode 100644
index 0000000..122615f
--- /dev/null
+++ b/pyphastft/requirements.txt
@@ -0,0 +1,7 @@
+maturin==1.6.0
+pip==24.0
+PyAudio==0.2.14
+pylint==3.2.3
+pyphastft==0.2.1
+PyQt5==5.15.10
+pyqtgraph==0.13.7
diff --git a/pyphastft/src/lib.rs b/pyphastft/src/lib.rs
index f69b64f..a2d805e 100644
--- a/pyphastft/src/lib.rs
+++ b/pyphastft/src/lib.rs
@@ -1,5 +1,5 @@
-use numpy::PyReadwriteArray1;
-use phastft::{fft_64 as fft_64_rs, planner::Direction};
+use numpy::{PyReadonlyArray1, PyReadwriteArray1};
+use phastft::{fft::r2c_fft_f64, fft_64 as fft_64_rs, planner::Direction};
 use pyo3::prelude::*;
 
 #[pyfunction]
@@ -18,9 +18,27 @@ fn fft(mut reals: PyReadwriteArray1<f64>, mut imags: PyReadwriteArray1<f64>, dir
     );
 }
 
+#[pyfunction]
+fn rfft(reals: PyReadonlyArray1<f64>, direction: char) -> (Vec<f64>, Vec<f64>) {
+    assert!(direction == 'f' || direction == 'r');
+    let _dir = if direction == 'f' {
+        Direction::Forward
+    } else {
+        Direction::Reverse
+    };
+
+    let big_n = reals.as_slice().unwrap().len();
+
+    let mut output_re = vec![0.0; big_n];
+    let mut output_im = vec![0.0; big_n];
+    r2c_fft_f64(reals.as_slice().unwrap(), &mut output_re, &mut output_im);
+    (output_re, output_im)
+}
+
 /// A Python module implemented in Rust.
 #[pymodule]
 fn pyphastft(_py: Python, m: &PyModule) -> PyResult<()> {
     m.add_function(wrap_pyfunction!(fft, m)?)?;
+    m.add_function(wrap_pyfunction!(rfft, m)?)?;
     Ok(())
 }
diff --git a/pyphastft/vis_qt.py b/pyphastft/vis_qt.py
deleted file mode 100644
index 2a5c5e7..0000000
--- a/pyphastft/vis_qt.py
+++ /dev/null
@@ -1,115 +0,0 @@
-import sys
-
-import numpy as np
-import pyaudio
-import pyqtgraph as pg
-from pyphastft import fft
-from pyqtgraph.Qt import QtWidgets, QtCore
-
-
-class RealTimeAudioSpectrum(QtWidgets.QWidget):
-    def __init__(self, parent=None):
-        super(RealTimeAudioSpectrum, self).__init__(parent)
-        self.n_fft_bins = 1024  # Increased FFT size for better frequency resolution
-        self.n_display_bins = 32  # Maintain the same number of bars in the display
-        self.sample_rate = 44100
-        self.smoothing_factor = 0.1  # Smaller value for more smoothing
-        self.ema_fft_data = np.zeros(
-            self.n_display_bins
-        )  # Adjusted to the number of display bins
-        self.init_ui()
-        self.init_audio_stream()
-
-    def init_ui(self):
-        self.layout = QtWidgets.QVBoxLayout(self)
-        self.plot_widget = pg.PlotWidget()
-        self.layout.addWidget(self.plot_widget)
-
-        # Customize plot aesthetics
-        self.plot_widget.setBackground("k")
-        self.plot_item = self.plot_widget.getPlotItem()
-        self.plot_item.setTitle(
-            "Real-Time Audio Spectrum Visualizer powered by PhastFT",
-            color="w",
-            size="16pt",
-        )
-
-        # Hide axis labels
-        self.plot_item.getAxis("left").hide()
-        self.plot_item.getAxis("bottom").hide()
-
-        # Set fixed ranges for the x and y axes to prevent them from jumping
-        self.plot_item.setXRange(0, self.sample_rate / 2, padding=0)
-        self.plot_item.setYRange(0, 1, padding=0)
-
-        self.bar_width = (
-            (self.sample_rate / 2) / self.n_display_bins * 0.90
-        )  # Adjusted width for display bins
-
-        # Calculate bar positions so they are centered with respect to their frequency values
-        self.freqs = np.linspace(
-            0 + self.bar_width / 2,
-            self.sample_rate / 2 - self.bar_width / 2,
-            self.n_display_bins,
-        )
-
-        self.bar_graph = pg.BarGraphItem(
-            x=self.freqs,
-            height=np.zeros(self.n_display_bins),
-            width=self.bar_width,
-            brush=pg.mkBrush("m"),
-        )
-        self.plot_item.addItem(self.bar_graph)
-
-        self.timer = QtCore.QTimer()
-        self.timer.timeout.connect(self.update)
-        self.timer.start(50)  # Update interval in milliseconds
-
-    def init_audio_stream(self):
-        self.p = pyaudio.PyAudio()
-        self.stream = self.p.open(
-            format=pyaudio.paFloat32,
-            channels=1,
-            rate=self.sample_rate,
-            input=True,
-            frames_per_buffer=self.n_fft_bins,  # This should match the FFT size
-            stream_callback=self.audio_callback,
-        )
-        self.stream.start_stream()
-
-    def audio_callback(self, in_data, frame_count, time_info, status):
-        audio_data = np.frombuffer(in_data, dtype=np.float32)
-        reals = np.zeros(self.n_fft_bins)
-        imags = np.zeros(self.n_fft_bins)
-        reals[: len(audio_data)] = audio_data  # Fill the reals array with audio data
-        fft(reals, imags, direction="f")
-        fft_magnitude = np.sqrt(reals**2 + imags**2)[: self.n_fft_bins // 2]
-
-        # Aggregate or interpolate FFT data to fit into display bins
-        new_fft_data = np.interp(
-            np.linspace(0, len(fft_magnitude), self.n_display_bins),
-            np.arange(len(fft_magnitude)),
-            fft_magnitude,
-        )
-
-        # Apply exponential moving average filter
-        self.ema_fft_data = self.ema_fft_data * self.smoothing_factor + new_fft_data * (
-            1.0 - self.smoothing_factor
-        )
-        return in_data, pyaudio.paContinue
-
-    def update(self):
-        self.bar_graph.setOpts(height=self.ema_fft_data, width=self.bar_width)
-
-    def closeEvent(self, event):
-        self.stream.stop_stream()
-        self.stream.close()
-        self.p.terminate()
-        event.accept()
-
-
-if __name__ == "__main__":
-    app = QtWidgets.QApplication(sys.argv)
-    window = RealTimeAudioSpectrum()
-    window.show()
-    sys.exit(app.exec_())
diff --git a/src/fft.rs b/src/fft.rs
new file mode 100644
index 0000000..13606bd
--- /dev/null
+++ b/src/fft.rs
@@ -0,0 +1,313 @@
+//! Implementation of Real valued FFT
+use std::simd::prelude::f64x8;
+
+use crate::planner::{Planner32, Planner64};
+use crate::{
+    fft_32_with_opts_and_plan, fft_64, fft_64_with_opts_and_plan,
+    twiddles::{generate_twiddles, Twiddles},
+    utils::deinterleave,
+    Direction, Options,
+};
+
+macro_rules! impl_r2c_fft {
+    ($func_name:ident, $precision:ty, $planner:ident, $fft_w_opts_and_plan:ident) => {
+        /// Performs a Real-Valued Fast Fourier Transform (FFT)
+        ///
+        /// This function computes the FFT of a real-valued input signal and produces
+        /// complex-valued output. The implementation follows the principles of splitting
+        /// the input into even and odd components and then performing the FFT on these
+        /// components.
+        ///
+        /// # Arguments
+        ///
+        /// `input_re` - A slice containing the real-valued input signal.
+        ///
+        /// `output_re` - A mutable slice to store the real parts of the FFT output.
+        ///
+        /// `output_im` - A mutable slice to store the imaginary parts of the FFT output.
+        ///
+        /// # Panics
+        ///
+        /// Panics if `output_re.len() != output_im.len()` and `input_re.len()` == `output_re.len()`
+        ///
+        /// # Examples
+        ///
+        /// ```
+        /// use phastft::fft::{r2c_fft_f32, r2c_fft_f64};
+        ///
+        /// let big_n = 16;
+        /// let input: Vec<f64> = (1..=big_n).map(|x| x as f64).collect();
+        /// let mut output_re = vec![0.0; big_n];
+        /// let mut output_im = vec![0.0; big_n];
+        /// r2c_fft_f64(&input, &mut output_re, &mut output_im);
+        ///
+        /// let input: Vec<f32> = (1..=big_n).map(|x| x as f32).collect();
+        /// let mut output_re: Vec<f32> = vec![0.0; big_n];
+        /// let mut output_im: Vec<f32> = vec![0.0; big_n];
+        /// r2c_fft_f32(&input, &mut output_re, &mut output_im);
+        /// ```
+        /// # References
+        ///
+        /// This implementation is based on the concepts discussed in
+        /// [Levente Kovács' post](https://kovleventer.com/blog/fft_real/).
+        pub fn $func_name(
+            input_re: &[$precision],
+            output_re: &mut [$precision],
+            output_im: &mut [$precision],
+        ) {
+            assert!(output_re.len() == output_im.len() && input_re.len() == output_re.len());
+            let big_n = input_re.len();
+
+            // Split real signal of size `N` into odd (size `N/2`) and even (size `N/2`) via deinterleave
+            // The even part is now the "real" components and the odd part is now the "imaginary" components
+            let (mut z_even, mut z_odd): (Vec<_>, Vec<_>) = deinterleave(&input_re);
+
+            let stride = big_n / 2;
+            let planner = <$planner>::new(big_n / 2, Direction::Forward);
+
+            // We only need (N / 2) / 2 twiddle factors for the actual FFT call, so we filter
+            // filter_twiddles(&mut planner.twiddles_re, &mut planner.twiddles_im);
+
+            let opts = Options::guess_options(z_even.len());
+            $fft_w_opts_and_plan(&mut z_even, &mut z_odd, &opts, &planner);
+
+            // Z = np.fft.fft(z)
+            let mut z_x_re = vec![0.0; big_n / 2];
+            let mut z_x_im = vec![0.0; big_n / 2];
+            let mut z_y_re = vec![0.0; big_n / 2];
+            let mut z_y_im = vec![0.0; big_n / 2];
+
+            // Zminconj = np.roll(np.flip(Z), 1).conj()
+            // Zx =  0.5  * (Z + Zminconj)
+            // Zy = -0.5j * (Z - Zminconj)
+            z_even[1..]
+                .iter()
+                .zip(z_odd[1..].iter())
+                .zip(z_even[1..].iter().rev())
+                .zip(z_odd[1..].iter().rev())
+                .zip(z_x_re[1..].iter_mut())
+                .zip(z_x_im[1..].iter_mut())
+                .zip(z_y_re[1..].iter_mut())
+                .zip(z_y_im[1..].iter_mut())
+                .for_each(
+                    |(((((((z_e, z_o), z_e_mc), z_o_mc), zx_re), zx_im), zy_re), zy_im)| {
+                        let a = *z_e;
+                        let b = *z_o;
+                        let c = *z_e_mc;
+                        let d = -(*z_o_mc);
+
+                        let t = 0.5 * (a + c);
+                        let u = 0.5 * (b + d);
+                        let v = -0.5 * (a - c);
+                        let w = 0.5 * (b - d);
+
+                        *zx_re = t;
+                        *zx_im = u;
+                        *zy_re = w;
+                        *zy_im = v;
+                    },
+                );
+
+            let a = z_even[0];
+            let b = z_odd[0];
+            let c = z_even[0];
+            let d = -z_odd[0];
+
+            let t = 0.5 * (a + c);
+            let u = 0.5 * (b + d);
+            let v = -0.5 * (a - c);
+            let w = 0.5 * (b - d);
+
+            z_x_re[0] = t;
+            z_x_im[0] = u;
+            z_y_re[0] = w;
+            z_y_im[0] = v;
+
+            // Zall = np.concatenate([Zx + W*Zy, Zx - W*Zy])
+            let (output_re_first_half, output_re_second_half) = output_re.split_at_mut(big_n / 2);
+            let (output_im_first_half, output_im_second_half) = output_im.split_at_mut(big_n / 2);
+            let twiddles_iter = Twiddles::<$precision>::new(stride);
+
+            z_x_re
+                .iter()
+                .zip(z_x_im.iter())
+                .zip(z_y_re.iter())
+                .zip(z_y_im.iter())
+                .zip(output_re_first_half)
+                .zip(output_im_first_half)
+                .zip(output_re_second_half)
+                .zip(output_im_second_half)
+                .zip(twiddles_iter)
+                .for_each(
+                    |(
+                        (
+                            ((((((zx_re, zx_im), zy_re), zy_im), o_re_fh), o_im_fh), o_re_sh),
+                            o_im_sh,
+                        ),
+                        (w_re, w_im),
+                    )| {
+                        let wz_re = w_re * zy_re - w_im * zy_im;
+                        let wz_im = w_re * zy_im + w_im * zy_re;
+
+                        // Zx + W * Zy
+                        *o_re_fh = zx_re + wz_re;
+                        *o_im_fh = zx_im + wz_im;
+
+                        // Zx - W * Zy
+                        *o_re_sh = zx_re - wz_re;
+                        *o_im_sh = zx_im - wz_im;
+                    },
+                );
+        }
+    };
+}
+
+impl_r2c_fft!(r2c_fft_f32, f32, Planner32, fft_32_with_opts_and_plan);
+impl_r2c_fft!(r2c_fft_f64, f64, Planner64, fft_64_with_opts_and_plan);
+
+/// Performs a Real-Valued, Inverse, Fast Fourier Transform (FFT)
+///
+/// # Panics
+///
+/// Panics if `reals.len() != imags.len()` *and* `reals.len() != output.len()`
+///
+pub fn r2c_ifft_f64(reals: &mut [f64], imags: &mut [f64], output: &mut [f64]) {
+    assert!(reals.len() == imags.len() && reals.len() == output.len());
+
+    let big_n = reals.len();
+
+    let mut z_x_re = vec![0.0; big_n / 2];
+    let mut z_x_im = vec![0.0; big_n / 2];
+
+    let (reals_first_half, reals_second_half) = reals.split_at(big_n / 2);
+    let (imags_first_half, imags_second_half) = imags.split_at(big_n / 2);
+
+    // Compute Zx
+    for i in 0..big_n / 2 {
+        let re = 0.5 * (reals_first_half[i] + reals_second_half[i]);
+        let im = 0.5 * (imags_first_half[i] + imags_second_half[i]);
+
+        z_x_re[i] = re;
+        z_x_im[i] = im;
+    }
+
+    let mut z_y_re = vec![0.0; big_n / 2];
+    let mut z_y_im = vec![0.0; big_n / 2];
+    let (twiddles_re, twiddles_im) = generate_twiddles::<f64>(big_n / 2, Direction::Reverse);
+
+    // Compute W * Zy
+    for i in 0..big_n / 2 {
+        let a = reals_first_half[i];
+        let b = imags_first_half[i];
+        let c = reals_second_half[i];
+        let d = imags_second_half[i];
+
+        let e = a - c;
+        let f = b - d;
+        let k = -0.5 * f;
+        let l = 0.5 * e;
+
+        let m = twiddles_re[i];
+        let n = twiddles_im[i];
+
+        z_y_re[i] = k * m - l * n;
+        z_y_im[i] = k * n + l * m;
+    }
+
+    const CHUNK_SIZE: usize = 8;
+
+    // Compute Zx + Zy
+    z_x_re
+        .chunks_exact_mut(CHUNK_SIZE)
+        .zip(z_x_im.chunks_exact_mut(CHUNK_SIZE))
+        .zip(z_y_re.chunks_exact(CHUNK_SIZE))
+        .zip(z_y_im.chunks_exact(CHUNK_SIZE))
+        .for_each(|(((zxr, zxi), zyr), zyi)| {
+            let mut z_x_r = f64x8::from_slice(zxr);
+            let mut z_x_i = f64x8::from_slice(zxi);
+            let z_y_r = f64x8::from_slice(zyr);
+            let z_y_i = f64x8::from_slice(zyi);
+
+            z_x_r += z_y_r;
+            z_x_i += z_y_i;
+            zxr.copy_from_slice(z_x_r.as_array());
+            zxi.copy_from_slice(z_x_i.as_array());
+        });
+
+    fft_64(&mut z_x_re, &mut z_x_im, Direction::Reverse);
+
+    // Store reals in the even indices, and imaginaries in the odd indices
+    output
+        .chunks_exact_mut(2)
+        .zip(z_x_re)
+        .zip(z_x_im)
+        .for_each(|((out, z_r), z_i)| {
+            out[0] = z_r;
+            out[1] = z_i;
+        });
+}
+
+#[cfg(test)]
+mod tests {
+    use utilities::assert_float_closeness;
+
+    use crate::fft_32;
+
+    use super::*;
+
+    macro_rules! impl_r2c_vs_c2c_test {
+        ($func:ident, $precision:ty, $fft_precision:ident, $rfft_func:ident, $epsilon:literal) => {
+            #[test]
+            fn $func() {
+                for n in 4..=11 {
+                    let big_n = 1 << n;
+                    let input_re: Vec<$precision> = (1..=big_n).map(|i| i as $precision).collect(); // Length is 7, which is a prime number
+                    let (mut output_re, mut output_im) = (vec![0.0; big_n], vec![0.0; big_n]);
+
+                    $rfft_func(&input_re, &mut output_re, &mut output_im);
+
+                    let mut input_re: Vec<$precision> =
+                        (1..=big_n).map(|i| i as $precision).collect();
+                    let mut input_im = vec![0.0; input_re.len()]; // Assume the imaginary part is zero for this example
+                    $fft_precision(&mut input_re, &mut input_im, Direction::Forward);
+
+                    output_re
+                        .iter()
+                        .zip(output_im.iter())
+                        .zip(input_re.iter())
+                        .zip(input_im.iter())
+                        .for_each(|(((a_re, a_im), e_re), e_im)| {
+                            assert_float_closeness(*a_re, *e_re, $epsilon);
+                            assert_float_closeness(*a_im, *e_im, $epsilon);
+                        });
+                }
+            }
+        };
+    }
+
+    impl_r2c_vs_c2c_test!(r2c_vs_c2c_f64, f64, fft_64, r2c_fft_f64, 1e-6);
+    impl_r2c_vs_c2c_test!(r2c_vs_c2c_f32, f32, fft_32, r2c_fft_f32, 3.5);
+
+    #[test]
+    fn fw_inv_eq_identity() {
+        let n = 4;
+        let big_n = 1 << n;
+        let expected_signal: Vec<f64> = (1..=big_n).map(|s| s as f64).collect();
+
+        let mut output_re = vec![0.0; big_n];
+        let mut output_im = vec![0.0; big_n];
+        r2c_fft_f64(&expected_signal, &mut output_re, &mut output_im);
+
+        let mut actual_signal = vec![0.0; big_n];
+        r2c_ifft_f64(&mut output_re, &mut output_im, &mut actual_signal);
+
+        for (z_a, z_e) in actual_signal.iter().zip(expected_signal.iter()) {
+            assert_float_closeness(*z_a, *z_e, 1e-10);
+        }
+
+        println!(
+            "expected signal: {:?}\nactual signal: {:?}",
+            expected_signal, actual_signal
+        );
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
index 09f64a1..5a5ee94 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -25,6 +25,7 @@ use crate::planner::{Direction, Planner32, Planner64};
 use crate::twiddles::filter_twiddles;
 
 pub mod cobra;
+pub mod fft;
 mod kernels;
 pub mod options;
 pub mod planner;
@@ -91,10 +92,10 @@ macro_rules! impl_fft_interleaved_for {
     };
 }
 
-#[doc(cfg(feature = "complex-nums"))]
+// #[doc(cfg(feature = "complex-nums"))]
 #[cfg(feature = "complex-nums")]
 impl_fft_interleaved_for!(fft_32_interleaved, f32, fft_32, deinterleave_complex32);
-#[doc(cfg(feature = "complex-nums"))]
+// #[doc(cfg(feature = "complex-nums"))]
 #[cfg(feature = "complex-nums")]
 impl_fft_interleaved_for!(fft_64_interleaved, f64, fft_64, deinterleave_complex64);
 
@@ -128,7 +129,7 @@ macro_rules! impl_fft_with_opts_and_plan_for {
             opts: &Options,
             planner: &$planner,
         ) {
-            assert!(reals.len() == imags.len() && reals.len().is_power_of_two());
+            assert!(reals.len() == imags.len() && reals.len().is_power_of_two(), "reals.len() and imags.len() must be equal, and both should be a power of 2. Actual lengths - reals: {} imags: {}", reals.len(), imags.len());
             let n: usize = reals.len().ilog2() as usize;
 
             // Use references to avoid unnecessary clones
@@ -137,7 +138,7 @@ macro_rules! impl_fft_with_opts_and_plan_for {
 
             // We shouldn't be able to execute FFT if the # of twiddles isn't equal to the distance
             // between pairs
-            assert!(twiddles_re.len() == reals.len() / 2 && twiddles_im.len() == imags.len() / 2);
+            assert!(twiddles_re.len() == reals.len() / 2 && twiddles_im.len() == imags.len() / 2,"got: {} == {} and {} == {}", twiddles_re.len(), reals.len() / 2, twiddles_im.len(), imags.len() / 2);
 
             match planner.direction {
                 Direction::Reverse => {
diff --git a/utilities/src/lib.rs b/utilities/src/lib.rs
index 0b330db..795d586 100644
--- a/utilities/src/lib.rs
+++ b/utilities/src/lib.rs
@@ -17,7 +17,7 @@ use rustfft::num_traits::Float;
 pub fn assert_float_closeness<T: Float + Display>(actual: T, expected: T, epsilon: T) {
     if (actual - expected).abs() >= epsilon {
         panic!(
-            "Assertion failed: {actual} too far from expected value {expected} (with epsilon {epsilon})",
+            "Assertion failed: actual value {actual} too far from expected value {expected} (with epsilon {epsilon})",
         );
     }
 }