From fb30cc02277cc5fb85a70892d841f92aaa9b629a Mon Sep 17 00:00:00 2001
From: William Gurecky <william.gurecky@gmail.com>
Date: Mon, 17 Jun 2024 23:48:19 -0400
Subject: [PATCH 1/7] wip: use faer Row Col product in arnoldi

---
 readme.md  |  1 +
 src/lib.rs | 10 +++++++---
 2 files changed, 8 insertions(+), 3 deletions(-)
diff --git a/readme.md b/readme.md
index 2c2cfcf..5c725fe 100644
--- a/readme.md
+++ b/readme.md
@@ -2,6 +2,7 @@ About
 =====
 
 [![Crates.io](https://img.shields.io/crates/v/faer_gmres)](https://crates.io/crates/faer_gmres)
+![status](https://github.com/wgurecky/faer-gmres/actions/workflows/rust.yml/badge.svg)
 
 GMRES in rust using [faer](https://github.com/sarah-ek/faer-rs).
 
diff --git a/src/lib.rs b/src/lib.rs
index bc0dea3..aa19398 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -33,6 +33,8 @@
 //
 use faer::prelude::*;
 use faer::sparse::*;
+use faer::row::{Row, RowMut, RowRef};
+use faer::col::{Col, ColRef, ColMut};
 use faer::mat;
 use num_traits::Float;
 use std::{error::Error, fmt};
@@ -162,6 +164,8 @@ fn arnoldi<'a, T>(
     linalg::matmul::sparse_dense_matmul(
         qv.as_mut(), a.as_ref(), q_col.as_ref(), None, T::from(1.0).unwrap(), faer::get_global_parallelism());
 
+    // let mut qvc: Col<T> = a * q_col.col(0);
+
     // Apply left preconditioner if supplied
     match m {
         Some(m) => m.apply_linop_to_vec(qv.as_mut()),
@@ -171,9 +175,9 @@ fn arnoldi<'a, T>(
     let mut h = Vec::with_capacity(k + 2);
     for i in 0..=k {
         let qci: MatRef<T> = q[i].as_ref();
-        let ht = qv.transpose() * qci;
-        h.push( ht.read(0, 0) );
-        qv = qv - (qci * faer::scale(h[i]));
+        let ht = qv.transpose().row(0) * qci.col(0);
+        h.push( ht );
+        qv = qv - (qci * faer::scale(ht));
     }
 
     h.push(qv.norm_l2());

From 9b5f38b9e2815436ec2ee4ebb32cfbeec0b10324 Mon Sep 17 00:00:00 2001
From: William Gurecky <william.gurecky@gmail.com>
Date: Tue, 18 Jun 2024 00:18:07 -0400
Subject: [PATCH 2/7] remove commented out code

---
 src/lib.rs | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index aa19398..edfc3c0 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -164,8 +164,6 @@ fn arnoldi<'a, T>(
     linalg::matmul::sparse_dense_matmul(
         qv.as_mut(), a.as_ref(), q_col.as_ref(), None, T::from(1.0).unwrap(), faer::get_global_parallelism());
 
-    // let mut qvc: Col<T> = a * q_col.col(0);
-
     // Apply left preconditioner if supplied
     match m {
         Some(m) => m.apply_linop_to_vec(qv.as_mut()),
@@ -176,8 +174,8 @@ fn arnoldi<'a, T>(
     for i in 0..=k {
         let qci: MatRef<T> = q[i].as_ref();
         let ht = qv.transpose().row(0) * qci.col(0);
-        h.push( ht );
-        qv = qv - (qci * faer::scale(ht));
+        h.push(ht);
+        qv = qv - (qci * faer::scale(h[i]));
     }
 
     h.push(qv.norm_l2());

From 561ed6d431f696b615f7517dc6bb56100090ee1c Mon Sep 17 00:00:00 2001
From: William Gurecky <william.gurecky@gmail.com>
Date: Wed, 19 Jun 2024 21:14:44 -0400
Subject: [PATCH 3/7] remove unused imports

---
 src/lib.rs | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index edfc3c0..7d5cda8 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -33,8 +33,6 @@
 //
 use faer::prelude::*;
 use faer::sparse::*;
-use faer::row::{Row, RowMut, RowRef};
-use faer::col::{Col, ColRef, ColMut};
 use faer::mat;
 use num_traits::Float;
 use std::{error::Error, fmt};

From 1fc49e0dcf0c83cf27dd7dc9e5f1760de1a3df05 Mon Sep 17 00:00:00 2001
From: William Gurecky <william.gurecky@gmail.com>
Date: Wed, 7 Aug 2024 21:10:25 -0400
Subject: [PATCH 4/7] do not allocate new solution vec

---
 Cargo.toml       |   1 +
 examples/ex_1.rs |   6 +-
 src/lib.rs       | 146 +++++++++++++++++++++++------------------------
 3 files changed, 77 insertions(+), 76 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 0bf668a..04f25cb 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -18,3 +18,4 @@ path = 'src/lib.rs'
 assert_approx_eq = "1.1.0"
 num-traits = "0.2.18"
 faer = {version = "0.19.0"}
+reborrow = "*"
diff --git a/examples/ex_1.rs b/examples/ex_1.rs
index 18fde0f..1c9ec6a 100644
--- a/examples/ex_1.rs
+++ b/examples/ex_1.rs
@@ -44,12 +44,12 @@ fn main() {
     }
 
     // init guess
-    let x0 = faer::Mat::zeros(216, 1);
+    let mut x0 = faer::Mat::zeros(216, 1);
 
     // solve the system
     let jacobi_pre = JacobiPreconLinOp::new(a_test.as_ref());
-    let (res_x, err, iters) = gmres(a_test.as_ref(), rhs.as_ref(), x0.as_ref(), 500, 1e-8, Some(&jacobi_pre)).unwrap();
-    println!("Result x: {:?}", res_x);
+    let (err, iters) = gmres(a_test.as_ref(), rhs.as_ref(), x0.as_mut(), 500, 1e-8, Some(&jacobi_pre)).unwrap();
+    println!("Result x: {:?}", x0);
     println!("Error x: {:?}", err);
     println!("Iters : {:?}", iters);
 
diff --git a/src/lib.rs b/src/lib.rs
index 7d5cda8..110af7a 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -34,6 +34,8 @@
 use faer::prelude::*;
 use faer::sparse::*;
 use faer::mat;
+use reborrow::*;
+use faer::utils::simd::Write;
 use num_traits::Float;
 use std::{error::Error, fmt};
 
@@ -42,7 +44,6 @@ pub struct GmresError<T>
     where
     T: faer::RealField + Float
 {
-    cur_x: Mat<T>,
     error: T,
     tol: T,
     msg: String,
@@ -169,7 +170,7 @@ fn arnoldi<'a, T>(
     }
 
     let mut h = Vec::with_capacity(k + 2);
-    for i in 0..=k {
+    for i in 0..k+1 {
         let qci: MatRef<T> = q[i].as_ref();
         let ht = qv.transpose().row(0) * qci.col(0);
         h.push(ht);
@@ -186,11 +187,11 @@ fn arnoldi<'a, T>(
 pub fn gmres<'a, T>(
     a: SparseColMatRef<'a, usize, T>,
     b: MatRef<T>,
-    x: MatRef<T>,
+    mut x: MatMut<T>,
     max_iter: usize,
     threshold: T,
     m: Option<&dyn LinOp<T>>
-) -> Result<(Mat<T>, T, usize), GmresError<T>>
+) -> Result<(T, usize), GmresError<T>>
     where
     T: faer::RealField + Float
 {
@@ -239,19 +240,13 @@ pub fn gmres<'a, T>(
         }
     }
 
-    // build full sparse H matrix from column vecs
-    // create sparse matrix from triplets
-    let mut h_triplets = Vec::new();
-    let mut h_len = 0;
+    // build full H matrix from column vecs
+    let mut h_dens: Mat<T> = faer::Mat::zeros(hs.last().unwrap().len()-1, hs.len());
     for (c, hvec) in (&hs).into_iter().enumerate() {
-        h_len = hvec.len();
-        for h_i in 0..h_len {
-            h_triplets.push((h_i, c, hvec[h_i]));
+        for h_i in 0..hvec.len()-1 {
+            h_dens.write(h_i, c, hvec[h_i]);
         }
     }
-    let h_sprs = SparseColMat::<usize, T>::try_new_from_triplets(
-        h_len, (&hs).len(), &h_triplets).unwrap();
-
 
     // build full Q matrix
     let mut q_out: Mat<T> = faer::Mat::zeros(qs[0].nrows(), qs.len());
@@ -262,16 +257,15 @@ pub fn gmres<'a, T>(
     }
 
     // compute solution
-    let h_qr = h_sprs.sp_qr().unwrap();
-    let y = h_qr.solve(&beta.get(0..k_iters+1, 0..1));
+    let h_qr2 = h_dens.qr();
+    let y2 = h_qr2.solve(&beta.get(0..k_iters, 0..1));
 
-    let sol = x.as_ref() + q_out * y;
+    x += q_out.get(0..q_out.nrows(), 0..y2.nrows()) * y2;
     if error <= threshold {
-        Ok((sol, error, k_iters))
+        Ok((error, k_iters))
     } else {
         Err(GmresError{
-            cur_x: sol,
-            error: error,
+            error,
             tol: threshold,
             msg: format!("GMRES did not converge. Error: {:?}. Threshold: {:?}", error, threshold)}
         )
@@ -282,34 +276,32 @@ pub fn gmres<'a, T>(
 pub fn restarted_gmres<'a, T>(
     a: SparseColMatRef<'a, usize, T>,
     b: MatRef<T>,
-    x: MatRef<T>,
+    mut x: MatMut<T>,
     max_iter_inner: usize,
     max_iter_outer: usize,
     threshold: T,
     m: Option<&dyn LinOp<T>>
-) -> Result<(Mat<T>, T, usize), GmresError<T>>
+) -> Result<(T, usize), GmresError<T>>
     where
     T: faer::RealField + Float
 {
-    let mut res_x = x.to_owned();
     let mut error = T::from(1e20).unwrap();
     let mut tot_iters = 0;
     let mut iters = 0;
     for _ko in 0..max_iter_outer {
         let res = gmres(
-            a.as_ref(), b.as_ref(), res_x.as_ref(),
+            a.as_ref(), b.as_ref(), x.rb_mut(),
             max_iter_inner, threshold, m);
         match res {
             // done
             Ok(res) => {
-                (res_x, error, iters) = res;
+                (error, iters) = res;
                 tot_iters += iters;
                 break;
             }
             // failed to converge move to next outer iter
             // store current solution for next outer iter
             Err(res) => {
-                res_x = res.cur_x;
                 error = res.error;
                 tot_iters += max_iter_inner;
             }
@@ -319,11 +311,10 @@ pub fn restarted_gmres<'a, T>(
         }
     }
     if error <= threshold {
-        Ok((res_x, error, tot_iters))
+        Ok((error, tot_iters))
     } else {
         Err(GmresError{
-            cur_x: res_x,
-            error: error,
+            error,
             tol: threshold,
             msg: format!("GMRES did not converge. Error: {:?}. Threshold: {:?}", error, threshold)}
         )
@@ -357,23 +348,23 @@ mod test_faer_gmres {
             ];
 
         // initia sol guess
-        let x0 = faer::mat![
+        let mut x0 = faer::mat![
             [0.0],
             [0.0],
             [0.0],
             ];
 
-        let (res_x, err, iters) = gmres(a_test.as_ref(), b.as_ref(), x0.as_ref(), 10, 1e-8, None).unwrap();
-        println!("Result x: {:?}", res_x);
+        let (err, iters) = gmres(a_test.as_ref(), b.as_ref(), x0.as_mut(), 10, 1e-8, None).unwrap();
+        println!("Result x: {:?}", x0.as_ref());
         println!("Error x: {:?}", err);
         println!("Iters : {:?}", iters);
         assert!(err < 1e-4);
         assert!(iters < 10);
 
         // expect result for x to be [2,1,2/3]
-        assert_approx_eq!(res_x.read(0, 0), 2.0, 1e-12);
-        assert_approx_eq!(res_x.read(1, 0), 1.0, 1e-12);
-        assert_approx_eq!(res_x.read(2, 0), 2.0/3.0, 1e-12);
+        assert_approx_eq!(x0.read(0, 0), 2.0, 1e-12);
+        assert_approx_eq!(x0.read(1, 0), 1.0, 1e-12);
+        assert_approx_eq!(x0.read(2, 0), 2.0/3.0, 1e-12);
     }
 
     #[test]
@@ -395,7 +386,7 @@ mod test_faer_gmres {
             ];
 
         // initia sol guess
-        let x0 = faer::mat![
+        let mut x0 = faer::mat![
             [0.0],
             [0.0],
             [0.0],
@@ -404,18 +395,18 @@ mod test_faer_gmres {
         // preconditioner
         let jacobi_pre = JacobiPreconLinOp::new(a_test.as_ref());
 
-        let (res_x, err, iters) = gmres(a_test.as_ref(), b.as_ref(), x0.as_ref(), 10, 1e-8, 
+        let (err, iters) = gmres(a_test.as_ref(), b.as_ref(), x0.as_mut(), 10, 1e-8,
                                         Some(&jacobi_pre)).unwrap();
-        println!("Result x: {:?}", res_x);
+        println!("Result x: {:?}", x0.as_ref());
         println!("Error x: {:?}", err);
         println!("Iters : {:?}", iters);
         assert!(err < 1e-4);
         assert!(iters < 10);
 
         // expect result for x to be [2,1,2/3]
-        assert_approx_eq!(res_x.read(0, 0), 2.0, 1e-12);
-        assert_approx_eq!(res_x.read(1, 0), 1.0, 1e-12);
-        assert_approx_eq!(res_x.read(2, 0), 2.0/3.0, 1e-12);
+        assert_approx_eq!(x0.read(0, 0), 2.0, 1e-12);
+        assert_approx_eq!(x0.read(1, 0), 1.0, 1e-12);
+        assert_approx_eq!(x0.read(2, 0), 2.0/3.0, 1e-12);
     }
 
     #[test]
@@ -448,7 +439,7 @@ mod test_faer_gmres {
             ];
 
         // initia sol guess
-        let x0 = faer::mat![
+        let mut x0 = faer::mat![
             [0.0],
             [0.0],
             [0.0],
@@ -456,19 +447,19 @@ mod test_faer_gmres {
             [0.0],
             ];
 
-        let (res_x, err, iters) = gmres(a_test.as_ref(), b.as_ref(), x0.as_ref(), 100, 1e-6, None).unwrap();
-        println!("Result x: {:?}", res_x);
+        let (err, iters) = gmres(a_test.as_ref(), b.as_ref(), x0.as_mut(), 100, 1e-6, None).unwrap();
+        println!("Result x: {:?}", x0.as_ref());
         println!("Error x: {:?}", err);
         println!("Iters : {:?}", iters);
         assert!(err < 1e-4);
         assert!(iters < 100);
 
         // expect result for x to be [0.037919, 0.888551, -0.657575, -0.181680, 0.292447]
-        assert_approx_eq!(res_x.read(0, 0), 0.037919, 1e-4);
-        assert_approx_eq!(res_x.read(1, 0), 0.888551, 1e-4);
-        assert_approx_eq!(res_x.read(2, 0), -0.657575, 1e-4);
-        assert_approx_eq!(res_x.read(3, 0), -0.181680, 1e-4);
-        assert_approx_eq!(res_x.read(4, 0), 0.292447, 1e-4);
+        assert_approx_eq!(x0.read(0, 0), 0.037919, 1e-4);
+        assert_approx_eq!(x0.read(1, 0), 0.888551, 1e-4);
+        assert_approx_eq!(x0.read(2, 0), -0.657575, 1e-4);
+        assert_approx_eq!(x0.read(3, 0), -0.181680, 1e-4);
+        assert_approx_eq!(x0.read(4, 0), 0.292447, 1e-4);
     }
 
     #[test]
@@ -501,7 +492,7 @@ mod test_faer_gmres {
             ];
 
         // initia sol guess
-        let x0: Mat<f32> = faer::mat![
+        let mut x0: Mat<f32> = faer::mat![
             [0.0],
             [0.0],
             [0.0],
@@ -509,19 +500,19 @@ mod test_faer_gmres {
             [0.0],
             ];
 
-        let (res_x, err, iters) = gmres(a_test.as_ref(), b.as_ref(), x0.as_ref(), 100, 1e-6, None).unwrap();
-        println!("Result x: {:?}", res_x);
+        let (err, iters) = gmres(a_test.as_ref(), b.as_ref(), x0.as_mut(), 100, 1e-6, None).unwrap();
+        println!("Result x: {:?}", x0.as_ref());
         println!("Error x: {:?}", err);
         println!("Iters : {:?}", iters);
         assert!(err < 1e-4);
         assert!(iters < 100);
 
         // expect result for x to be [0.037919, 0.888551, -0.657575, -0.181680, 0.292447]
-        assert_approx_eq!(res_x.read(0, 0), 0.037919, 1e-4);
-        assert_approx_eq!(res_x.read(1, 0), 0.888551, 1e-4);
-        assert_approx_eq!(res_x.read(2, 0), -0.657575, 1e-4);
-        assert_approx_eq!(res_x.read(3, 0), -0.181680, 1e-4);
-        assert_approx_eq!(res_x.read(4, 0), 0.292447, 1e-4);
+        assert_approx_eq!(x0.read(0, 0), 0.037919, 1e-4);
+        assert_approx_eq!(x0.read(1, 0), 0.888551, 1e-4);
+        assert_approx_eq!(x0.read(2, 0), -0.657575, 1e-4);
+        assert_approx_eq!(x0.read(3, 0), -0.181680, 1e-4);
+        assert_approx_eq!(x0.read(4, 0), 0.292447, 1e-4);
     }
 
 
@@ -555,7 +546,7 @@ mod test_faer_gmres {
             ];
 
         // initia sol guess
-        let x0: Mat<f32> = faer::mat![
+        let mut x0: Mat<f32> = faer::mat![
             [0.0],
             [0.0],
             [0.0],
@@ -563,32 +554,41 @@ mod test_faer_gmres {
             [0.0],
             ];
 
-        let (res_x, err, iters) = restarted_gmres(
-            a_test.as_ref(), b.as_ref(), x0.as_ref(), 3, 30,
+        let (err, iters) = restarted_gmres(
+            a_test.as_ref(), b.as_ref(), x0.as_mut(), 3, 30,
             1e-6, None).unwrap();
-        println!("Result x: {:?}", res_x);
+        println!("Result x: {:?}", x0);
         println!("Error x: {:?}", err);
         println!("Iters : {:?}", iters);
         assert!(err < 1e-4);
         assert!(iters < 100);
-        assert_approx_eq!(res_x.read(0, 0), 0.037919, 1e-4);
-        assert_approx_eq!(res_x.read(1, 0), 0.888551, 1e-4);
-        assert_approx_eq!(res_x.read(2, 0), -0.657575, 1e-4);
-        assert_approx_eq!(res_x.read(3, 0), -0.181680, 1e-4);
-        assert_approx_eq!(res_x.read(4, 0), 0.292447, 1e-4);
+        assert_approx_eq!(x0.read(0, 0), 0.037919, 1e-4);
+        assert_approx_eq!(x0.read(1, 0), 0.888551, 1e-4);
+        assert_approx_eq!(x0.read(2, 0), -0.657575, 1e-4);
+        assert_approx_eq!(x0.read(3, 0), -0.181680, 1e-4);
+        assert_approx_eq!(x0.read(4, 0), 0.292447, 1e-4);
+
+        // initia sol guess
+        let mut x0: Mat<f32> = faer::mat![
+            [0.0],
+            [0.0],
+            [0.0],
+            [0.0],
+            [0.0],
+            ];
 
         // with preconditioning
         let jacobi_pre = JacobiPreconLinOp::new(a_test.as_ref());
-        let (res_x_precon, err_precon, iters_precon) = restarted_gmres(
-            a_test.as_ref(), b.as_ref(), x0.as_ref(), 3, 30,
+        let (err_precon, iters_precon) = restarted_gmres(
+            a_test.as_ref(), b.as_ref(), x0.as_mut(), 3, 30,
             1e-6, Some(&jacobi_pre)).unwrap();
         assert!(iters_precon < iters);
         assert!(err_precon < 1e-4);
-        assert_approx_eq!(res_x_precon.read(0, 0), 0.037919, 1e-4);
-        assert_approx_eq!(res_x_precon.read(1, 0), 0.888551, 1e-4);
-        assert_approx_eq!(res_x_precon.read(2, 0), -0.657575, 1e-4);
-        assert_approx_eq!(res_x_precon.read(3, 0), -0.181680, 1e-4);
-        assert_approx_eq!(res_x_precon.read(4, 0), 0.292447, 1e-4);
+        assert_approx_eq!(x0.read(0, 0), 0.037919, 1e-4);
+        assert_approx_eq!(x0.read(1, 0), 0.888551, 1e-4);
+        assert_approx_eq!(x0.read(2, 0), -0.657575, 1e-4);
+        assert_approx_eq!(x0.read(3, 0), -0.181680, 1e-4);
+        assert_approx_eq!(x0.read(4, 0), 0.292447, 1e-4);
     }
 
     #[test]

From 23b3a10c1c7251198cad5a8bbadc8f048752726b Mon Sep 17 00:00:00 2001
From: William Gurecky <william.gurecky@gmail.com>
Date: Wed, 7 Aug 2024 23:31:02 -0400
Subject: [PATCH 5/7] migrate to faer LinOp for precond

---
 Cargo.toml       |  2 +-
 examples/ex_2.rs | 60 ++++++++++++++++++++++++++++++++++
 readme.md        |  2 +-
 src/lib.rs       | 85 +++++++++++++++++++++++++++++++++++++++---------
 4 files changed, 132 insertions(+), 17 deletions(-)
 create mode 100644 examples/ex_2.rs

diff --git a/Cargo.toml b/Cargo.toml
index 04f25cb..448c2be 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -17,5 +17,5 @@ path = 'src/lib.rs'
 [dependencies]
 assert_approx_eq = "1.1.0"
 num-traits = "0.2.18"
-faer = {version = "0.19.0"}
+faer = {version = "0.19.1", features=["unstable"]}
 reborrow = "*"
diff --git a/examples/ex_2.rs b/examples/ex_2.rs
new file mode 100644
index 0000000..8b0f799
--- /dev/null
+++ b/examples/ex_2.rs
@@ -0,0 +1,60 @@
+use faer_gmres::{gmres, restarted_gmres, JacobiPreconLinOp};
+use faer::prelude::*;
+use faer::sparse::*;
+use std::fs::read_to_string;
+use std::time::Instant;
+
+
+fn main() {
+    let mut a_triplets = vec![];
+    let mut b_rhs = vec![];
+
+    // read A matrix from file
+    for line in read_to_string("./examples/data/e40r0100.txt").unwrap().lines() {
+        let mut tmp_vec_line = vec![];
+        let iter = line.split_whitespace();
+        for word in iter {
+            tmp_vec_line.push(word);
+        }
+        let idx_i = tmp_vec_line[0].parse::<usize>().unwrap()-1;
+        let idx_j = tmp_vec_line[1].parse::<usize>().unwrap()-1;
+        let val = tmp_vec_line[tmp_vec_line.len()-1].parse::<f64>().unwrap();
+        a_triplets.push((idx_i, idx_j, val));
+    }
+
+    // read rhs from file
+    for line in read_to_string("./examples/data/e40r0100_rhs1.txt").unwrap().lines() {
+        let mut tmp_vec_line = vec![];
+        let iter = line.split_whitespace();
+        for word in iter {
+            tmp_vec_line.push(word);
+        }
+        let val = tmp_vec_line[tmp_vec_line.len()-1].parse::<f64>().unwrap();
+        b_rhs.push(val);
+    }
+
+    // create sparse mat
+    let a_test = SparseColMat::<usize, f64>::try_new_from_triplets(
+        17281, 17281,
+        &a_triplets).unwrap();
+
+    // create rhs
+    let mut rhs = faer::Mat::zeros(17281, 1);
+    for (i, rhs_val) in b_rhs.into_iter().enumerate() {
+        rhs.write(i, 0, rhs_val);
+    }
+
+    // init guess
+    let mut x0 = faer::Mat::zeros(17281, 1);
+
+    // solve the system
+    let jacobi_pre = JacobiPreconLinOp::new(a_test.as_ref());
+    let now = Instant::now();
+    let (err, iters) = gmres(a_test.as_ref(), rhs.as_ref(), x0.as_mut(), 5000, 1e-6, Some(&jacobi_pre)).unwrap();
+    let dt = now.elapsed();
+    println!("Result x: {:?}", x0);
+    println!("Error x: {:?}", err);
+    println!("Iters : {:?}", iters);
+    println!("Solve time: {:?}  ", dt);
+
+}
diff --git a/readme.md b/readme.md
index 5c725fe..d6e4d8a 100644
--- a/readme.md
+++ b/readme.md
@@ -53,7 +53,7 @@ Example use:
 A preconditioner can be supplied:
 
     // continued from above...
-    use faer_gmres::{JacobiPreconLinOp, LinOp};
+    use faer_gmres::{JacobiPreconLinOp};
     let jacobi_pre = JacobiPreconLinOp::new(a_test.as_ref());
     let (res_x, err, iters) = gmres(a_test.as_ref(), b.as_ref(), x0.as_ref(), 10, 1e-8, Some(&jacobi_pre)).unwrap();
 
diff --git a/src/lib.rs b/src/lib.rs
index 110af7a..018c499 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -34,6 +34,9 @@
 use faer::prelude::*;
 use faer::sparse::*;
 use faer::mat;
+use faer::linop::LinOp;
+use faer::Parallelism;
+use faer::dyn_stack::PodStack;
 use reborrow::*;
 use faer::utils::simd::Write;
 use num_traits::Float;
@@ -63,36 +66,78 @@ impl <T> fmt::Display for GmresError<T>
     }
 }
 
-pub trait LinOp<T>
-    where
-    T: faer::RealField + Float
-{
-    fn apply_linop_to_vec(&self, target: MatMut<T>);
-}
+// pub trait LinOp<T>
+//     where
+//     T: faer::RealField + Float
+// {
+//     fn apply_linop_to_vec(&self, target: MatRef<T>) -> Mat<T>;
+// }
 
-#[derive(Clone)]
+#[derive(Clone,Debug)]
 pub struct JacobiPreconLinOp<'a, T>
     where
     T: faer::RealField + Float
 {
     m: SparseColMatRef<'a, usize, T>,
 }
+
 impl <'a, T> LinOp<T> for JacobiPreconLinOp<'a, T>
     where
     T: faer::RealField + Float + faer::SimpleEntity
 {
-    fn apply_linop_to_vec(&self, mut target: MatMut<T>) {
+    fn apply_req(
+            &self,
+            rhs_ncols: usize,
+            parallelism: Parallelism,
+        ) -> Result<faer::dyn_stack::StackReq, faer::dyn_stack::SizeOverflow> {
+        let _ = parallelism;
+        let _ = rhs_ncols;
+        Ok(faer::dyn_stack::StackReq::empty())
+    }
+
+    fn nrows(&self) -> usize {
+        self.m.nrows()
+    }
+
+    fn ncols(&self) -> usize {
+        self.m.ncols()
+    }
+
+    fn apply(
+        &self,
+        out: MatMut<T>,
+        rhs: MatRef<T>,
+        parallelism: Parallelism,
+        stack: PodStack<'_>,
+        )
+    {
+        // unused
+        _ = parallelism;
+        _ = stack;
+
+        let mut out = out;
         let eps = T::from(1e-12).unwrap();
         let one_c = T::from(1.0).unwrap();
-        let zero_c = T::from(0.0).unwrap();
-        for i in 0..target.nrows()
+        for i in 0..rhs.nrows()
         {
-            let v = target.read(i, 0);
-            target.write(i, 0,
+            let v = rhs.read(i, 0);
+            out.write(i, 0,
                  v * (one_c / (*self.m.as_ref().get(i, i).unwrap_or(&one_c) + eps) ));
         }
     }
+
+    fn conj_apply(
+            &self,
+            out: MatMut<'_, T>,
+            rhs: MatRef<'_, T>,
+            parallelism: Parallelism,
+            stack: PodStack<'_>,
+        ) {
+        // Not implented error!
+        panic!("Not Implemented");
+    }
 }
+
 impl <'a, T> JacobiPreconLinOp <'a, T>
     where
     T: faer::RealField + Float
@@ -104,7 +149,6 @@ impl <'a, T> JacobiPreconLinOp <'a, T>
     }
 }
 
-
 /// Calculate the givens rotation matrix
 fn givens_rotation<T>(v1: T, v2: T) -> (T, T)
     where
@@ -165,7 +209,12 @@ fn arnoldi<'a, T>(
 
     // Apply left preconditioner if supplied
     match m {
-        Some(m) => m.apply_linop_to_vec(qv.as_mut()),
+        Some(m) => {
+            let mut _dummy_podstack: [u8;1] = [0u8;1];
+            let mut lp_out = faer::Mat::zeros(qv.nrows(), qv.ncols());
+            m.apply(lp_out.as_mut(), qv.as_ref(), faer::get_global_parallelism(), PodStack::new(&mut _dummy_podstack));
+            qv = lp_out;
+        },
         _ => {}
     }
 
@@ -197,8 +246,14 @@ pub fn gmres<'a, T>(
 {
     // compute initial residual
     let mut r = b - a * x.as_ref();
+
     match &m {
-        Some(m) => (&m).apply_linop_to_vec(r.as_mut()),
+        Some(m) => {
+            let mut _dummy_podstack: [u8;1] = [0u8;1];
+            let mut lp_out = faer::Mat::zeros(r.nrows(), r.ncols());
+            (&m).apply(lp_out.as_mut(), r.as_ref(), faer::get_global_parallelism(), PodStack::new(&mut _dummy_podstack));
+            r = lp_out;
+        },
         _ => {}
     }
 

From d6b843051c1ca2200686bd7f3586908d2336cf1d Mon Sep 17 00:00:00 2001
From: William Gurecky <william.gurecky@gmail.com>
Date: Wed, 7 Aug 2024 23:52:54 -0400
Subject: [PATCH 6/7] refactor gmres to accept general faer linop as input

---
 src/lib.rs | 39 +++++++++++++++++++--------------------
 1 file changed, 19 insertions(+), 20 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index 018c499..2dbdd68 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -66,13 +66,6 @@ impl <T> fmt::Display for GmresError<T>
     }
 }
 
-// pub trait LinOp<T>
-//     where
-//     T: faer::RealField + Float
-// {
-//     fn apply_linop_to_vec(&self, target: MatRef<T>) -> Mat<T>;
-// }
-
 #[derive(Clone,Debug)]
 pub struct JacobiPreconLinOp<'a, T>
     where
@@ -189,8 +182,8 @@ fn apply_givens_rotation<T>(h: &mut Vec<T>, cs: &mut Vec<T>, sn: &mut Vec<T>, k:
 /// * `m`- An optional preconditioner that is applied to the original system such that
 ///        the new krylov subspace built is [M^{-1}k, M^{-1}Ak, M^{-1}A^2k, ...].
 ///        If None, no preconditioner is applied.
-fn arnoldi<'a, T>(
-    a: SparseColMatRef<'a, usize, T>,
+fn arnoldi<'a, T, Lop: LinOp<T>>(
+    a: &Lop,
     q: &Vec<Mat<T>>,
     k: usize,
     m: Option<&dyn LinOp<T>>
@@ -198,19 +191,22 @@ fn arnoldi<'a, T>(
     where
     T: faer::RealField + Float
 {
+    // unused in faer LinOp apply() method
+    let mut _dummy_podstack: [u8;1] = [0u8;1];
+
     // Krylov vector
     let q_col: MatRef<T> = q[k].as_ref();
 
     // let mut qv: Mat<f64> = a * q_col;
     // parallel version of above
     let mut qv: Mat<T> = faer::Mat::zeros(q_col.nrows(), 1);
-    linalg::matmul::sparse_dense_matmul(
-        qv.as_mut(), a.as_ref(), q_col.as_ref(), None, T::from(1.0).unwrap(), faer::get_global_parallelism());
+    //linalg::matmul::sparse_dense_matmul(
+    //    qv.as_mut(), a.as_ref(), q_col.as_ref(), None, T::from(1.0).unwrap(), faer::get_global_parallelism());
+    a.apply(qv.as_mut(), q_col.as_ref(), faer::get_global_parallelism(), PodStack::new(&mut _dummy_podstack));
 
     // Apply left preconditioner if supplied
     match m {
         Some(m) => {
-            let mut _dummy_podstack: [u8;1] = [0u8;1];
             let mut lp_out = faer::Mat::zeros(qv.nrows(), qv.ncols());
             m.apply(lp_out.as_mut(), qv.as_ref(), faer::get_global_parallelism(), PodStack::new(&mut _dummy_podstack));
             qv = lp_out;
@@ -233,8 +229,8 @@ fn arnoldi<'a, T>(
 
 
 /// Generalized minimal residual method
-pub fn gmres<'a, T>(
-    a: SparseColMatRef<'a, usize, T>,
+pub fn gmres<'a, T, Lop: LinOp<T>>(
+    a: Lop,
     b: MatRef<T>,
     mut x: MatMut<T>,
     max_iter: usize,
@@ -245,11 +241,14 @@ pub fn gmres<'a, T>(
     T: faer::RealField + Float
 {
     // compute initial residual
-    let mut r = b - a * x.as_ref();
+    // let mut a_x = a * x.as_ref();
+    let mut _dummy_podstack: [u8;1] = [0u8;1];
+    let mut a_x = faer::Mat::zeros(b.nrows(), b.ncols());
+    a.apply(a_x.as_mut(), x.as_ref(), faer::get_global_parallelism(), PodStack::new(&mut _dummy_podstack));
+    let mut r = b - a_x;
 
     match &m {
         Some(m) => {
-            let mut _dummy_podstack: [u8;1] = [0u8;1];
             let mut lp_out = faer::Mat::zeros(r.nrows(), r.ncols());
             (&m).apply(lp_out.as_mut(), r.as_ref(), faer::get_global_parallelism(), PodStack::new(&mut _dummy_podstack));
             r = lp_out;
@@ -277,7 +276,7 @@ pub fn gmres<'a, T>(
 
     let mut k_iters = 0;
     for k in 0..max_iter {
-        let (mut hk, qk) = arnoldi(a, &qs, k, m);
+        let (mut hk, qk) = arnoldi(&a, &qs, k, m);
         apply_givens_rotation(&mut hk, &mut cs, &mut sn, k);
         hs.push(hk);
         qs.push(qk);
@@ -328,8 +327,8 @@ pub fn gmres<'a, T>(
 }
 
 /// Restarted Generalized minimal residual method
-pub fn restarted_gmres<'a, T>(
-    a: SparseColMatRef<'a, usize, T>,
+pub fn restarted_gmres<'a, T, Lop: LinOp<T>>(
+    a: Lop,
     b: MatRef<T>,
     mut x: MatMut<T>,
     max_iter_inner: usize,
@@ -345,7 +344,7 @@ pub fn restarted_gmres<'a, T>(
     let mut iters = 0;
     for _ko in 0..max_iter_outer {
         let res = gmres(
-            a.as_ref(), b.as_ref(), x.rb_mut(),
+            &a, b.as_ref(), x.rb_mut(),
             max_iter_inner, threshold, m);
         match res {
             // done

From 7a4262a1ea17cc58f943d8c31858a777f2bfda43 Mon Sep 17 00:00:00 2001
From: William Gurecky <william.gurecky@gmail.com>
Date: Wed, 7 Aug 2024 23:58:13 -0400
Subject: [PATCH 7/7] update readme for new interface changes

---
 readme.md | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/readme.md b/readme.md
index d6e4d8a..074367e 100644
--- a/readme.md
+++ b/readme.md
@@ -6,7 +6,7 @@ About
 
 GMRES in rust using [faer](https://github.com/sarah-ek/faer-rs).
 
-Solves linear systems of the form: Ax=b, where A is sparse.  Depends on faer for sparse matrix implementation and sparse QR solver.
+Solves linear systems of the form: Ax=b, where A is sparse.  Depends on faer for sparse matrix implementation.
 
 Use
 ===
@@ -36,15 +36,16 @@ Example use:
         ];
 
     // init sol guess
-    let x0 = faer::mat![
+    // Note: x is modified in-place, the result is stored in x
+    let mut x = faer::mat![
         [0.0],
         [0.0],
         [0.0],
         ];
 
     // the final None arg means do not apply left preconditioning
-    let (res_x, err, iters) = gmres(a_test.as_ref(), b.as_ref(), x0.as_ref(), 10, 1e-8, None).unwrap();
-    println!("Result x: {:?}", res_x);
+    let (err, iters) = gmres(a_test.as_ref(), b.as_ref(), x.as_mut(), 10, 1e-8, None).unwrap();
+    println!("Result x: {:?}", x);
     println!("Error x: {:?}", err);
     println!("Iters : {:?}", iters);
 
@@ -55,7 +56,7 @@ A preconditioner can be supplied:
     // continued from above...
     use faer_gmres::{JacobiPreconLinOp};
     let jacobi_pre = JacobiPreconLinOp::new(a_test.as_ref());
-    let (res_x, err, iters) = gmres(a_test.as_ref(), b.as_ref(), x0.as_ref(), 10, 1e-8, Some(&jacobi_pre)).unwrap();
+    let (err, iters) = gmres(a_test.as_ref(), b.as_ref(), x.as_mut(), 10, 1e-8, Some(&jacobi_pre)).unwrap();
 
 ## Restarted GMRES:
 
@@ -64,8 +65,8 @@ A restarted GMRES routine is provided:
     use faer_gmres::restarted_gmres;
     let max_inner = 30;
     let max_outer = 50;
-    let (res_x, err, iters) = restarted_gmres(
-        a_test.as_ref(), b.as_ref(), x0.as_ref(), max_inner, max_outer, 1e-8, None).unwrap();
+    let (err, iters) = restarted_gmres(
+        a_test.as_ref(), b.as_ref(), x.as_mut(), max_inner, max_outer, 1e-8, None).unwrap();
 
 This will repeatedly call the inner GMRES routine, using the previous outer iteration's solution as the inital guess for the next outer solve.  The current implementation of restarted GMRES in this package can reduce the memory requirements needed, but slow convergence.