diff --git a/.clang-format b/.clang-format
new file mode 100644
index 0000000..ecfd1c7
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,19 @@
+---
+Language:                 Cpp
+BasedOnStyle:             Google
+
+ColumnLimit:              100
+
+BreakBeforeBraces:        Custom
+BraceWrapping:
+  AfterClass:             true
+  AfterControlStatement:  false
+  AfterEnum:              true
+  AfterFunction:          true
+  AfterNamespace:         false
+  AfterObjCDeclaration:   true
+  AfterStruct:            true
+  AfterUnion:             true
+  BeforeCatch:            false
+  BeforeElse:             false
+  IndentBraces:           false
diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 0000000..cdb7f77
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,22 @@
+root = true
+
+[*]
+indent_style = space
+indent_size = 2
+end_of_line = lf
+charset = utf-8
+trim_trailing_whitespace = true
+insert_final_newline = false
+max_line_length = 100
+
+[*.py]
+indent_size = 4
+
+[Makefile]
+indent_style = tab
+
+[CMakeLists.txt]
+indent_size = 2
+
+[*.{json,yml,yaml}]
+indent_size = 2
diff --git a/.github/workflows/default.yml b/.github/workflows/default.yml
index 35e4cc0..152aa8f 100644
--- a/.github/workflows/default.yml
+++ b/.github/workflows/default.yml
@@ -15,38 +15,37 @@ jobs:
 
     steps:
 
-    - name: Checkout repository
-      uses: actions/checkout@v4
-
-    - name: Install dependencies
-      run: |
-        sudo apt-get install -y build-essential python-dev-is-python3 python3-numpy python3-pytest
-
-    - name: Build library
-      run: make PYTHON=python
-
-    - name: Run library tests
-      run: make test PYTHON=python
-
-    - name: Copy library to docs/ directory
-      run: cp -r permanent/ docs/permanent/
-
-    - name: Build documentation
-      uses: ammaraskar/sphinx-action@master
-      with:
-        build-command: "make html"
-        docs-folder: "docs/"
-
-    - name: Fix documentation permissions
-      run: |
-        chmod -c -R +rX "docs/build/html/" | while read line; do
-          echo "::warning title=Invalid file permissions automatically fixed::$line"
-        done
-
-    - name: Upload Pages artifact
-      uses: actions/upload-pages-artifact@v3
-      with:
-        path: docs/build/html/
+      - uses: actions/checkout@v4
+
+      - name: Install dependencies
+        run: sudo apt-get install -y build-essential cmake
+
+      - name: Install Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+      - run: pip install .
+
+      - name: PyTest
+        run: |
+          pip install pytest
+          pytest -v tests
+
+      - name: Build documentation
+        run: |
+          pip install sphinx sphinx_rtd_theme sphinx_copybutton
+          cd docs && make html
+
+      - name: Fix documentation permissions
+        run: |
+          chmod -c -R +rX "docs/build/html/" | while read line; do
+            echo "::warning title=Invalid file permissions automatically fixed::$line"
+          done
+
+      - name: Upload Pages artifact
+        uses: actions/upload-pages-artifact@v3
+        with:
+          path: docs/build/html/
 
   deploy:
 
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index 10deae7..4d57422 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -10,35 +10,34 @@ jobs:
 
     steps:
 
-    - name: Checkout repository
-      uses: actions/checkout@v4
-
-    - name: Install dependencies
-      run: |
-        sudo apt-get install -y build-essential python-dev-is-python3 python3-numpy python3-pytest
-
-    - name: Build library
-      run: make PYTHON=python
-
-    - name: Run library tests
-      run: make test PYTHON=python
-
-    - name: Copy library to docs/ directory
-      run: cp -r permanent/ docs/permanent/
-
-    - name: Build documentation
-      uses: ammaraskar/sphinx-action@master
-      with:
-        build-command: "make html"
-        docs-folder: "docs/"
-
-    - name: Fix documentation permissions
-      run: |
-        chmod -c -R +rX "docs/build/html/" | while read line; do
-          echo "::warning title=Invalid file permissions automatically fixed::$line"
-        done
-
-    - name: Upload Pages artifact
-      uses: actions/upload-pages-artifact@v3
-      with:
-        path: docs/build/html/
+      - uses: actions/checkout@v4
+
+      - name: Install dependencies
+        run: sudo apt-get install -y build-essential cmake
+
+      - name: Install Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+      - run: pip install .
+
+      - name: PyTest
+        run: |
+          pip install pytest
+          pytest -v tests
+
+      - name: Build documentation
+        run: |
+          pip install sphinx sphinx_rtd_theme sphinx_copybutton
+          cd docs && make html
+
+      - name: Fix documentation permissions
+        run: |
+          chmod -c -R +rX "docs/build/html/" | while read line; do
+            echo "::warning title=Invalid file permissions automatically fixed::$line"
+          done
+
+      - name: Upload Pages artifact
+        uses: actions/upload-pages-artifact@v3
+        with:
+          path: docs/build/html/
diff --git a/.gitignore b/.gitignore
index 0cbe245..7bd2e46 100644
--- a/.gitignore
+++ b/.gitignore
@@ -84,7 +84,8 @@ tags
 .vscode/
 
 # Project files
-compile_flags.txt
-src/tuning
-src/tuning.h
-src/tuning.csv
+/include/permanent/tuning.h
+
+# Language server files
+compile_commands.json
+.ccls-cache
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index bc17c38..a64bf66 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -52,8 +52,8 @@ repos:
     hooks:
       - id: cppcheck
         args: ["--inline-suppr", "--suppress=missingIncludeSystem",
-          "--suppress=*:src/swap.h", "--suppress=*:src/perm-mv0.h", "--suppress=*:src/kperm-gray.h"]
+          "--suppress=*:include/permanent/swap.h", "--suppress=*:include/permanent/perm-mv0.h", "--suppress=*:include/permanent/kperm-gray.h"]
       - id: cpplint
         args: ["--root=src", "--filter=-build/include_subdir"]
       - id: clang-format
-        args: ["-i", "-style={BasedOnStyle: google, IndentWidth: 4}"]
+        args: ["-i", "-style=file"]
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..1ecd2c0
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,186 @@
+# Supported CMake versions
+
+cmake_minimum_required(VERSION 3.23 FATAL_ERROR)
+
+# Project configuration
+
+set(CMAKE_CXX_STANDARD 20)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+project(MatrixPermanent VERSION 0.0.1 LANGUAGES CXX)
+
+if(PROJECT_SOURCE_DIR STREQUAL PROJECT_BINARY_DIR)
+  message(
+    FATAL_ERROR
+      "In-source builds not allowed; make a build directory and run CMake from it\n")
+endif()
+
+if(ENV{PERMANENT_TUNE})
+  set(PERMANENT_TUNE ON)
+endif()
+
+if(SKBUILD OR ENV{PERMANENT_PYTHON})
+  set(PERMANENT_PYTHON ON)
+endif()
+
+# Dependencies
+
+if(PROJECT_IS_TOP_LEVEL AND PERMANENT_PYTHON)
+  find_package(Python REQUIRED COMPONENTS Interpreter Development.Module)
+elseif(PERMANENT_TUNE)
+  find_package(Python REQUIRED COMPONENTS Interpreter)
+endif()
+
+# Tuning header target
+
+set(PERMANENT_TUNING_HEADER "${CMAKE_CURRENT_SOURCE_DIR}/include/permanent/tuning.h")
+set(PERMANENT_TUNING_DATA "${CMAKE_BINARY_DIR}/tuning.csv")
+
+if(PERMANENT_TUNE)
+  add_executable(PermanentTune src/tuning.cc)
+  target_include_directories(PermanentTune PRIVATE include)
+
+  add_custom_target(TuningData ALL
+    COMMAND
+      PermanentTune "${PERMANENT_TUNING_DATA}"
+    BYPRODUCTS
+      "${PERMANENT_TUNING_DATA}")
+
+  add_custom_target(TuningHeader ALL
+    COMMAND
+    ${Python_EXECUTABLE}
+        "${CMAKE_CURRENT_SOURCE_DIR}/tools/generate_tuning_header.py"
+        "${PERMANENT_TUNING_DATA}"
+        "${PERMANENT_TUNING_HEADER}"
+    BYPRODUCTS
+      "${PERMANENT_TUNING_HEADER}")
+  add_dependencies(TuningHeader TuningData)
+else()
+  add_custom_target(TuningHeader ALL
+    COMMAND
+      cp "-f"
+        "${CMAKE_CURRENT_SOURCE_DIR}/include/permanent/tuning.default.h"
+        "${PERMANENT_TUNING_HEADER}"
+    BYPRODUCTS
+      "${PERMANENT_TUNING_HEADER}")
+endif()
+
+# Header library target
+
+add_library(permanent_headers INTERFACE)
+add_library(MatrixPermanent::headers ALIAS permanent_headers)
+add_dependencies(permanent_headers TuningHeader)
+target_include_directories(permanent_headers
+  INTERFACE
+    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+    $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
+
+if(PROJECT_IS_TOP_LEVEL)
+  set(CPACK_RESOURCE_FILE_LICENSE "${PROJECT_SOURCE_DIR}/LICENSE")
+  include(GNUInstallDirs)
+  include(CMakePackageConfigHelpers)
+  include(CPack)
+  write_basic_package_version_file(
+    "${PROJECT_NAME}ConfigVersion.cmake"
+    VERSION
+        ${PROJECT_VERSION}
+    COMPATIBILITY
+        SameMajorVersion)
+  configure_package_config_file(
+    "${PROJECT_SOURCE_DIR}/cmake/${PROJECT_NAME}Config.cmake.in"
+    "${PROJECT_BINARY_DIR}/${PROJECT_NAME}Config.cmake"
+    INSTALL_DESTINATION
+      ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/cmake)
+
+  install(TARGETS permanent_headers
+    EXPORT ${PROJECT_NAME}_Targets
+    ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
+
+  install(
+    EXPORT
+      ${PROJECT_NAME}_Targets
+    FILE
+      ${PROJECT_NAME}Targets.cmake
+    NAMESPACE
+      ${PROJECT_NAME}::
+    DESTINATION
+      ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/cmake)
+
+  install(
+    FILES
+      "${PROJECT_BINARY_DIR}/${PROJECT_NAME}Config.cmake"
+      "${PROJECT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake"
+    DESTINATION
+      ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/cmake)
+
+  install(
+    DIRECTORY
+      ${PROJECT_SOURCE_DIR}/include/permanent
+    DESTINATION
+      include)
+endif()
+
+# Python library target
+
+if(PROJECT_IS_TOP_LEVEL AND PERMANENT_PYTHON)
+  execute_process(
+    COMMAND
+      sh -c "${CMAKE_CXX_COMPILER} --version | head -n 1"
+    WORKING_DIRECTORY
+      ${PROJECT_SOURCE_DIR}
+    OUTPUT_VARIABLE
+      _PERMANENT_COMPILER_VERSION
+    OUTPUT_STRIP_TRAILING_WHITESPACE)
+
+  execute_process(
+    COMMAND
+      git rev-parse --abbrev-ref HEAD
+    WORKING_DIRECTORY
+      ${PROJECT_SOURCE_DIR}
+    OUTPUT_VARIABLE
+      _PERMANENT_GIT_BRANCH
+    OUTPUT_STRIP_TRAILING_WHITESPACE)
+
+  execute_process(
+    COMMAND
+      git log -1 --format=%h
+    WORKING_DIRECTORY
+      ${PROJECT_SOURCE_DIR}
+    OUTPUT_VARIABLE
+      _PERMANENT_GIT_COMMIT_HASH
+    OUTPUT_STRIP_TRAILING_WHITESPACE)
+
+  execute_process(
+    COMMAND
+    ${Python_EXECUTABLE} "-c" "import numpy; print(numpy.get_include())"
+    OUTPUT_VARIABLE
+      NumPy_INCLUDE_DIRS
+    OUTPUT_STRIP_TRAILING_WHITESPACE)
+
+  python_add_library(permanent MODULE src/main.cc WITH_SOABI)
+  target_include_directories(permanent
+    PRIVATE
+      include
+      ${NumPy_INCLUDE_DIRS})
+  target_compile_definitions(permanent
+    PRIVATE
+      "_PERMANENT_VERSION=${PROJECT_VERSION}"
+      "_PERMANENT_COMPILER_VERSION=${_PERMANENT_COMPILER_VERSION}"
+      "_PERMANENT_GIT_BRANCH=${_PERMANENT_GIT_BRANCH}"
+      "_PERMANENT_GIT_COMMIT_HASH=${_PERMANENT_GIT_COMMIT_HASH}"
+  )
+  if (UNIX)
+    if (APPLE)
+      set_target_properties(permanent
+        PROPERTIES
+          LINK_FLAGS "-Wl,-dylib,-undefined,dynamic_lookup")
+    else()
+      set_target_properties(permanent
+        PROPERTIES
+          LINK_FLAGS "-Wl,--allow-shlib-undefined")
+    endif()
+  endif()
+  install(TARGETS permanent DESTINATION .)
+endif()
diff --git a/Makefile b/Makefile
index d5fcff2..213f262 100644
--- a/Makefile
+++ b/Makefile
@@ -1,89 +1,34 @@
-CXX    ?= g++
-AR     ?= ar
-PYTHON ?= python3
+PREFIX ?= /usr/local
 
-CXXFLAGS := -std=c++17 -Wall -Wextra -g -fPIC -O3
-
-ifeq ($(shell uname -s),Darwin)
-CXXFLAGS += -undefined dynamic_lookup
-endif
-
-ifneq ($(BUILD_NATIVE),)
-CXXFLAGS += -march=native -mtune=native
+CMAKE_FLAGS :=
+CMAKE_FLAGS += -DCMAKE_INSTALL_PREFIX=$(DESTDIR)$(PREFIX)
+ifdef PERMANENT_TUNE
+CMAKE_FLAGS += -DPERMANENT_TUNE=ON
 endif
-
-ifneq ($(RUN_TUNING),)
-CXXFLAGS += -DRUN_TUNING=1
-endif
-
-ifeq ($(PREFIX),)
-PREFIX := /usr/local
+ifdef PERMANENT_PYTHON
+CMAKE_FLAGS += -DPERMANENT_PYTHON=ON
+CMAKE_FLAGS += -DCMAKE_EXPORT_COMPILE_COMMANDS=ON
 endif
 
-# Build C and Python libraries
-.PHONY: all
-all: libpermanent.a libpermanent.so permanent/permanent.so
+CLEAN_TARGETS :=
+CLEAN_TARGETS += include/permanent/tuning.h
+CLEAN_TARGETS += build dist _build _generate
+CLEAN_TARGETS += permanent.*egg-info permanent.*so
+CLEAN_TARGETS += compile_commands.json
 
-# Build C libraries
-.PHONY: c
-c: libpermanent.a libpermanent.so
+.PHONY: all clean install _build
 
-# Build Python libraries
-.PHONY: python
-python: permanent/permanent.so
+all: _build
 
-# Install C libraries
-.PHONY: install
-install: src/tuning.h libpermanent.a libpermanent.so
-	install -d $(DESTDIR)$(PREFIX)/lib/
-	install -m 444 libpermanent.a $(DESTDIR)$(PREFIX)/lib/
-	install -m 555 libpermanent.so $(DESTDIR)$(PREFIX)/lib/
-	install -d $(DESTDIR)$(PREFIX)/include/src/
-	install -m 444 src/permanent.h $(DESTDIR)$(PREFIX)/include/src/
-	install -m 444 src/tuning.h $(DESTDIR)$(PREFIX)/include/src/
-
-# Run tests
-.PHONY: test
-test: permanent/permanent.so
-	$(PYTHON) -m pytest -v .
-
-# Clean directory
-.PHONY: clean
 clean:
-	rm -f src/tuning src/tuning.h src/tuning.csv
-	rm -f src/libpermanent.o permanent/permanent.so libpermanent.a libpermanent.so
-
-# compile_flags.txt (clangd)
-compile_flags.txt:
-	echo "$(CXXFLAGS)" | sed 's/ /\n/g' > $@
-
-# Find tuning parameters
-src/tuning.h: src/permanent.h src/permanent.cc src/tuning.cc tools/tuning.py
-	$(CXX) $(CXXFLAGS) -o src/tuning src/permanent.cc src/tuning.cc
-	@if [ -n "$(RUN_TUNING)" ]; then \
-		echo "running tuning..."; \
-		src/tuning; \
-		echo "writing custom tuning.h"; \
-		$(PYTHON) tools/tuning.py; \
-	else \
-		echo "writing default tuning.h"; \
-		src/tuning; \
-	fi
-
-# Compile Python library
-permanent/permanent.so: src/tuning.h src/permanent.h src/permanent.cc src/py_permanent.cc tools/include_dirs.py
-	$(CXX) $(CXXFLAGS) -DWITH_TUNING_FILE=1 \
-		$(shell $(PYTHON) tools/include_dirs.py) \
-		-shared -o $@ src/permanent.cc src/py_permanent.cc
+	rm -rf $(CLEAN_TARGETS)
 
-# Compile object code
-src/libpermanent.o: src/tuning.h src/permanent.h src/permanent.cc
-	$(CXX) $(CXXFLAGS) -DWITH_TUNING_FILE=1 -c -o $@ src/permanent.cc
+install: _build
+	@cmake --install build
 
-# Compile static C library
-libpermanent.a: src/libpermanent.o
-	$(AR) crs $@ $^
+_build: build
+	@cmake --build build --target all
+	@test -e build/compile_commands.json && ln -sf build/compile_commands.json .
 
-# Compile shared C library
-libpermanent.so: src/libpermanent.o
-	$(CXX) $(CXXFLAGS) -shared -o $@ $^
+build:
+	@cmake -B build $(CMAKE_FLAGS) .
diff --git a/cmake/MatrixPermanentConfig.cmake.in b/cmake/MatrixPermanentConfig.cmake.in
new file mode 100644
index 0000000..9c15f36
--- /dev/null
+++ b/cmake/MatrixPermanentConfig.cmake.in
@@ -0,0 +1,4 @@
+@PACKAGE_INIT@
+
+include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@Targets.cmake")
+check_required_components("@PROJECT_NAME@")
diff --git a/include/permanent.h b/include/permanent.h
new file mode 100644
index 0000000..60e685d
--- /dev/null
+++ b/include/permanent.h
@@ -0,0 +1,13 @@
+/* Copyright 2024 QC-Devs (GPLv3) */
+
+#if !defined(permanent_h_)
+#define permanent_h_
+
+#include <permanent/common.h>
+#include <permanent/complex.h>
+#include <permanent/combinatoric.h>
+#include <permanent/glynn.h>
+#include <permanent/ryser.h>
+#include <permanent/opt.h>
+
+#endif  // permanent_h_
diff --git a/include/permanent/combinatoric.h b/include/permanent/combinatoric.h
new file mode 100644
index 0000000..a367067
--- /dev/null
+++ b/include/permanent/combinatoric.h
@@ -0,0 +1,61 @@
+/* Copyright 2024 QC-Devs (GPLv3) */
+
+#if !defined(permanent_combinatoric_h_)
+#define permanent_combinatoric_h_
+
+#include <permanent/common.h>
+#include <permanent/complex.h>
+#include <permanent/kperm-gray.h>
+#include <permanent/perm-mv0.h>
+
+namespace permanent {
+
+template <typename T, typename I = void>
+result_t<T, I> combinatoric_square(const size_t m, const size_t n, const T *ptr)
+{
+  (void)n;
+
+  perm_mv0 permutations(m);
+
+  result_t<T, I> out = 0;
+  const size_t *perm = permutations.data();
+  do {
+    result_t<T, I> prod = 1;
+    for (size_t i = 0; i != m; ++i) {
+      prod *= ptr[i * m + perm[i]];
+    }
+    out += prod;
+  } while (permutations.next());
+
+  return out;
+}
+
+template <typename T, typename I = void>
+result_t<T, I> combinatoric_rectangular(const size_t m, const size_t n, const T *ptr)
+{
+  kperm_gray permutations(n);
+  permutations.first(m);
+
+  result_t<T, I> out = 0;
+  const size_t *perm = permutations.data();
+  do {
+    result_t<T, I> prod = 1;
+    for (size_t i = 0; i != m; ++i) {
+      prod *= ptr[i * n + perm[i]];
+    }
+    out += prod;
+  } while (permutations.next());
+
+  return out;
+}
+
+template <typename T, typename I = void>
+result_t<T, I> combinatoric(const size_t m, const size_t n, const T *ptr)
+{
+  return (m == n) ? combinatoric_square<T, I>(m, n, ptr)
+                  : combinatoric_rectangular<T, I>(m, n, ptr);
+}
+
+}  // namespace permanent
+
+#endif  // permanent_combinatoric_h_
diff --git a/include/permanent/common.h b/include/permanent/common.h
new file mode 100644
index 0000000..7654964
--- /dev/null
+++ b/include/permanent/common.h
@@ -0,0 +1,54 @@
+/* Copyright 2024 QC-Devs (GPLv3) */
+
+#if !defined(permanent_common_h_)
+#define permanent_common_h_
+
+#include <complex>
+#include <cstdint>
+#include <type_traits>
+
+namespace permanent {
+
+// Numerical types
+
+using size_t = std::size_t;
+
+using sgn_t = std::int_fast8_t;
+
+// Static dispatcher for result type
+
+template <typename Type, typename IntType = void, typename = void>
+struct _result_t;
+
+template <typename Type>
+struct _result_t<Type, void, std::enable_if_t<std::is_scalar_v<Type>, void>>
+{
+  typedef double type;
+};
+
+template <typename Type>
+struct _result_t<std::complex<Type>, void, std::enable_if_t<std::is_scalar_v<Type>, void>>
+{
+  typedef std::complex<double> type;
+};
+
+template <typename Type, typename IntType>
+struct _result_t<Type, IntType,
+                 std::enable_if_t<std::is_integral_v<Type> && std::is_integral_v<IntType>, void>>
+{
+  typedef IntType type;
+};
+
+template <typename Type, typename IntType>
+struct _result_t<std::complex<Type>, std::complex<IntType>,
+                 std::enable_if_t<std::is_integral_v<Type> && std::is_integral_v<IntType>, void>>
+{
+  typedef std::complex<IntType> type;
+};
+
+template <typename Type, typename IntType = void>
+using result_t = _result_t<Type, IntType>::type;
+
+}  // namespace permanent
+
+#endif  // permanent_common_h_
diff --git a/include/permanent/complex.h b/include/permanent/complex.h
new file mode 100644
index 0000000..a6e5f7d
--- /dev/null
+++ b/include/permanent/complex.h
@@ -0,0 +1,44 @@
+/* Copyright 2024 QC-Devs (GPLv3) */
+
+#if !defined(PERMANENT_COMPLEX_H_)
+#define PERMANENT_COMPLEX_H_
+
+#include <complex>
+
+namespace permanent {
+
+// Allow type promotion to complex
+
+template <typename T>
+struct _identity_t
+{
+  typedef T type;
+};
+
+template <typename T>
+using identity_t = _identity_t<T>::type;
+
+}  // namespace permanent
+
+#define DEF_COMPLEX_OPS(OP)                                                                \
+                                                                                           \
+  template <typename T>                                                                    \
+  std::complex<T> operator OP(const std::complex<T> &x, const permanent::identity_t<T> &y) \
+  {                                                                                        \
+    return x OP y;                                                                         \
+  }                                                                                        \
+                                                                                           \
+  template <typename T>                                                                    \
+  std::complex<T> operator OP(const permanent::identity_t<T> &x, const std::complex<T> &y) \
+  {                                                                                        \
+    return x OP y;                                                                         \
+  }
+
+DEF_COMPLEX_OPS(+)
+DEF_COMPLEX_OPS(-)
+DEF_COMPLEX_OPS(*)
+DEF_COMPLEX_OPS(/)
+
+#undef DEF_COMPLEX_OPS
+
+#endif  // PERMANENT_COMPLEX_H_
diff --git a/include/permanent/glynn.h b/include/permanent/glynn.h
new file mode 100644
index 0000000..4969f7e
--- /dev/null
+++ b/include/permanent/glynn.h
@@ -0,0 +1,367 @@
+/* Copyright 2024 QC-Devs (GPLv3) */
+
+#if !defined(permanent_glynn_h_)
+#define permanent_glynn_h_
+
+#include <permanent/common.h>
+#include <permanent/complex.h>
+#include <permanent/tables.h>
+
+namespace permanent {
+
+template <typename T, typename I = void>
+result_t<T, I> glynn_square(const size_t m, const size_t n, const T *ptr)
+{
+  (void)n;
+
+  /* Fill delta array (all +1 to start), and permutation array with [0...m]. */
+
+  sgn_t delta[64];
+  size_t perm[64 + 1];
+  for (size_t i = 0; i < m; ++i) {
+    perm[i] = i;
+    delta[i] = 1;
+  }
+
+  /* Handle first permutation. */
+
+  // result_t<T, I> vec[64];
+  result_t<T, I> out = 1;
+  for (size_t j = 0; j < m; ++j) {
+    result_t<T, I> sum = 0;
+    for (size_t i = 0; i < m; ++i) {
+      sum += ptr[i * m + j] * delta[i];
+    }
+    out *= sum;
+    // vec[j] = sum;
+  }
+  // for (size_t j = 0; j < m; ++j) {
+  //     out *= vec[j];
+  // }
+
+  /* Iterate from the second to the final permutation. */
+
+  std::size_t bound = m - 1;
+  std::size_t pos = 0;
+  sgn_t sign = 1;
+  while (pos != bound) {
+    /* Update sign and delta. */
+
+    sign *= -1;
+    delta[bound - pos] *= -1;
+
+    /* Compute term. */
+
+    result_t<T, I> prod = 1;
+    for (size_t j = 0; j < m; ++j) {
+      result_t<T, I> sum = 0;
+      for (size_t i = 0; i < m; ++i) {
+        sum += ptr[i * m + j] * delta[i];
+      }
+      // vec[j] = sum;
+      prod *= sum;
+    }
+
+    /* Multiply by the product of the vectors in delta. */
+
+    // for (size_t i = 0; i < m; ++i) {
+    //     prod *= vec[i];
+    // }
+    out += sign * prod;
+
+    /* Go to next permutation. */
+
+    perm[0] = 0;
+    perm[pos] = perm[pos + 1];
+    ++pos;
+    perm[pos] = pos;
+    pos = perm[0];
+  }
+
+  /* Divide by external factor and return permanent. */
+
+  return out / (1UL << bound);
+}
+
+template <typename T, typename I = void>
+result_t<T, I> glynn_rectangular(const size_t m, const size_t n, const T *ptr)
+{
+  /* Fill delta array (all +1 to start), and permutation array with [0...n].
+   */
+
+  sgn_t delta[64];
+  size_t perm[64 + 1];
+  for (size_t i = 0; i < n; ++i) {
+    delta[i] = 1;
+    perm[i] = i;
+  }
+
+  /* Handle first permutation. */
+
+  result_t<T, I> out = 1;
+  for (size_t j = 0; j < n; ++j) {
+    result_t<T, I> sum = 0;
+    for (size_t i = 0; i < m; ++i) {
+      sum += ptr[i * n + j] * delta[i];
+    }
+    for (size_t k = m; k < n; ++k) {
+      sum += delta[k];
+    }
+    // vec[j] = sum;
+    out *= sum;
+  }
+  // for (i = 0; i < n; ++i) {
+  //     out *= vec[i];
+  // }
+
+  /* Iterate from the second to the final permutation. */
+
+  size_t pos = 0;
+  size_t bound = n - 1;
+  sgn_t sign = 1;
+  while (pos != bound) {
+    /* Update sign and delta. */
+
+    sign *= -1;
+    delta[bound - pos] *= -1;
+
+    /* Compute term. */
+
+    result_t<T, I> prod = 1;
+    for (size_t j = 0; j < n; ++j) {
+      result_t<T, I> sum = 0;
+      for (size_t i = 0; i < m; ++i) {
+        sum += ptr[i * n + j] * delta[i];
+      }
+
+      for (size_t k = m; k < n; ++k) {
+        sum += delta[k];
+      }
+      prod *= sum;
+    }
+
+    /* Multiply by the product of the vectors in delta. */
+
+    // T prod = 1.0;
+    // for (i = 0; i < n; ++i) {
+    //     prod *= vec[i];
+    // }
+    out += sign * prod;
+
+    /* Go to next permutation. */
+
+    perm[0] = 0;
+    perm[pos] = perm[pos + 1];
+    ++pos;
+    perm[pos] = pos;
+    pos = perm[0];
+  }
+
+  /* Divide by external factor and return permanent. */
+
+  return (out / (1UL << bound)) / factorial<result_t<T, I>>(n - m);
+}
+
+template <typename T, typename I = void>
+result_t<T, I> glynn_square1(const size_t m, const size_t n, const T *ptr)
+{
+  (void)n;
+
+  // Fill delta array ([+1...+1]) and permutation array ([0...m])
+  size_t perm[64 + 1];
+  sgn_t delta[64];
+  for (size_t i = 0; i != m; ++i) {
+    perm[i] = i;
+    delta[i] = 1;
+  }
+
+  // Compute first permutation
+  result_t<T, I> vec[64];
+  result_t<T, I> out = 1;
+  for (size_t j = 0; j != m; ++j) {
+    // Compute inner terms
+    result_t<T, I> sum = 0;
+    for (size_t i = 0; i != m; ++i) {
+      sum += ptr[i * m + j];
+    }
+    vec[j] = sum;
+
+    // Compute first outer term
+    out *= sum;
+  }
+
+  // Iterate from the second to the final permutation
+  size_t bound = m - 1;
+  size_t pos = 0;
+  sgn_t sign = -1;
+  while (pos != bound) {
+    // Update delta
+    size_t idx = bound - pos;
+    delta[idx] *= -1;
+
+    // Update inner terms
+    for (size_t i = 0; i != m; ++i) {
+      vec[i] += ptr[idx * m + i] * delta[idx] * 2;
+    }
+
+    // Add product of inner terms to outer term
+    result_t<T, I> prod = 1;
+    for (size_t i = 0; i != m; ++i) {
+      prod *= vec[i];
+    }
+    out += sign * prod;
+
+    // Go to next permutation
+    perm[0] = 0;
+    perm[pos] = perm[pos + 1];
+    ++pos;
+    perm[pos] = pos;
+    pos = perm[0];
+    sign *= -1;
+  }
+
+  // Divide outer term by external factor and return permanent
+  return out / (1UL << bound);
+}
+
+template <typename T, typename I = void>
+result_t<T, I> glynn_rectangular1(const size_t m, const size_t n, const T *ptr)
+{
+  // Fill delta array ([+1...+1]) and permutation array ([0...m])
+  size_t perm[64 + 1];
+  sgn_t delta[64];
+  for (size_t i = 0; i != n; ++i) {
+    perm[i] = i;
+    delta[i] = 1;
+  }
+
+  // Compute first permutation
+  result_t<T, I> vec[64];
+  result_t<T, I> out = n * (n - m);
+  for (size_t j = 0; j != m; ++j) {
+    // Compute inner terms
+    result_t<T, I> sum = 0;
+    for (size_t i = 0; i != n; ++i) {
+      sum += ptr[j * n + i];
+    }
+    vec[j] = sum;
+
+    // Compute first outer term
+    out *= sum;
+  }
+  for (size_t j = m; j != n; ++j) {
+    vec[j] = n;
+  }
+
+  // Iterate from the second to the final permutation
+  size_t bound = n - 1;
+  size_t pos = 0;
+  sgn_t sign = -1;
+  while (pos != bound) {
+    // Update delta
+    size_t idx = bound - pos;
+    delta[idx] *= -1;
+
+    // Update inner terms
+    for (size_t i = 0; i != m; ++i) {
+      vec[i] += ptr[i * n + idx] * delta[idx] * 2;
+    }
+    for (size_t i = m; i != n; ++i) {
+      vec[i] += delta[idx] * 2;
+    }
+
+    // Add product of inner terms to outer term
+    result_t<T, I> prod = 1;
+    for (size_t i = 0; i != n; ++i) {
+      prod *= vec[i];
+    }
+    out += sign * prod;
+
+    // Go to next permutation
+    perm[0] = 0;
+    perm[pos] = perm[pos + 1];
+    ++pos;
+    perm[pos] = pos;
+    pos = perm[0];
+    sign *= -1;
+  }
+
+  // Divide outer term by external factor and return permanent
+  return (out / (1UL << bound)) / factorial<result_t<T, I>>(n - m);
+}
+
+template <typename T, typename I = void>
+result_t<T, I> glynn_rectangular2(const size_t m, const size_t n, const T *ptr)
+{
+  // Fill delta array ([+1...+1]) and permutation array ([0...m])
+  size_t perm[64 + 1];
+  sgn_t delta[64];
+  for (size_t i = 0; i != n; ++i) {
+    perm[i] = i;
+    delta[i] = 1;
+  }
+
+  // Compute first permutation
+  result_t<T, I> vec[64];
+  result_t<T, I> out = 1;
+  for (size_t j = 0; j != n; ++j) {
+    // Compute inner terms
+    result_t<T, I> sum = n - m;
+    for (size_t i = 0; i != m; ++i) {
+      sum += ptr[i * n + j];
+    }
+    vec[j] = sum;
+
+    // Compute first outer term
+    out *= sum;
+  }
+
+  // Iterate from the second to the final permutation
+  size_t bound = n - 1;
+  size_t pos = 0;
+  sgn_t sign = -1;
+  while (pos != bound) {
+    // Update delta
+    size_t idx = bound - pos;
+    delta[idx] *= -1;
+
+    // Update inner terms
+    if (idx < m) {
+      for (size_t i = 0; i != n; ++i) {
+        vec[i] += ptr[idx * n + i] * delta[idx] * 2;
+      }
+    } else {
+      for (size_t i = 0; i != n; ++i) {
+        vec[i] += delta[idx] * 2;
+      }
+    }
+
+    // Add product of inner terms to outer term
+    result_t<T, I> prod = 1;
+    for (size_t i = 0; i != n; ++i) {
+      prod *= vec[i];
+    }
+    out += sign * prod;
+
+    // Go to next permutation
+    perm[0] = 0;
+    perm[pos] = perm[pos + 1];
+    ++pos;
+    perm[pos] = pos;
+    pos = perm[0];
+    sign *= -1;
+  }
+
+  // Divide outer term by external factor and return permanent
+  return (out / (1UL << bound)) / factorial<result_t<T, I>>(n - m);
+}
+
+template <typename T, typename I = void>
+result_t<T, I> glynn(const size_t m, const size_t n, const T *ptr)
+{
+  return (m == n) ? glynn_square<T, I>(m, n, ptr) : glynn_rectangular<T, I>(m, n, ptr);
+}
+
+}  // namespace permanent
+
+#endif  // permanent_glynn_h_
diff --git a/include/permanent/kperm-gray.h b/include/permanent/kperm-gray.h
new file mode 100644
index 0000000..fa33baa
--- /dev/null
+++ b/include/permanent/kperm-gray.h
@@ -0,0 +1,152 @@
+#if !defined HAVE_KPERM_GRAY_H__
+#define HAVE_KPERM_GRAY_H__
+// This file is part of the FXT library.
+// Copyright (C) 2010, 2012, 2013, 2014, 2019, 2023 Joerg Arndt
+// License: GNU General Public License version 3 or later,
+// see the file COPYING.txt in the main directory.
+
+#include <cstdlib>
+
+#include "swap.h"
+
+class kperm_gray
+// Gray code for k-permutations of n elements.
+// Same as: k-prefixes of permutations of n elements.
+// Same as: arrangements of k out of n elements.
+// CAT algorithm based on mixed radix Gray code
+//   for the factorial number system (falling base).
+{
+ protected:
+  std::size_t d_[64];      // mixed radix digits with radix = [n-1, n-2, ..., 2]
+  std::size_t i_[64];      // directions
+  std::size_t ix_[64];     // permutation (inverse perms in Trotter's order)
+  std::size_t x_[64];      // inverse permutation (perms in Trotter's order)
+  std::size_t n_;          // total of n elements
+  std::size_t k_;          // prefix length: permutations of k elements
+  std::size_t sw1_, sw2_;  // indices of elements swapped most recently
+
+  kperm_gray(const kperm_gray &) = delete;
+  kperm_gray &operator=(const kperm_gray &) = delete;
+
+ public:
+  explicit kperm_gray(std::size_t n)
+  {
+    n_ = n;
+    k_ = n;
+    // d_ = new std::size_t[n_];
+    d_[n - 1] = -1UL;  // sentinel
+    // i_ = new std::size_t[n_];
+    // x_ = new std::size_t[n_];
+    // ix_ = new std::size_t[n_];
+    i_[n_ - 1] = 0;
+    first(n_);
+  }
+
+  ~kperm_gray()
+  {
+    // delete [] i_;
+    // delete [] d_;
+    // delete [] x_;
+    // delete [] ix_;
+  }
+
+  const std::size_t *data() const { return ix_; }
+  const std::size_t *invdata() const { return x_; }
+  void get_swap(std::size_t &s1, std::size_t &s2) const
+  {
+    s1 = sw1_;
+    s2 = sw2_;
+  }
+
+  const std::size_t *mr_digits() const { return d_; }
+
+  void first(std::size_t k)
+  {
+    k_ = k;
+    for (std::size_t j = 0; j < n_ - 1; ++j) d_[j] = 0;
+    for (std::size_t j = 0; j < n_ - 1; ++j) i_[j] = +1;
+    for (std::size_t j = 0; j < n_; ++j) x_[j] = ix_[j] = j;
+    sw1_ = n_ - 1;
+    sw2_ = n_ - 2;
+  }
+
+  //    void last(std::size_t k)
+  //    {
+  //        first(k);  // k_ is set with call
+  //        d_[n_-2] = 1;
+  //        swap2(x_[n_-1], x_[n_-2]);
+  //        swap2(ix_[n_-1], ix_[n_-2]);
+  //        for (std::size_t j=0; j<n_-2; ++j)  i_[j] = -1UL;
+  //        i_[n_-2] = +1;
+  //    }
+
+ private:
+  void swap(std::size_t j, std::size_t im)
+  {
+    const std::size_t x1 = j;        // element j
+    const std::size_t i1 = ix_[x1];  // position of j
+    const std::size_t i2 = i1 + im;  // neighbor
+    const std::size_t x2 = x_[i2];   // position of neighbor
+    x_[i1] = x2;
+    x_[i2] = x1;  // swap2(x_[i1], x_[i2]);
+    ix_[x1] = i2;
+    ix_[x2] = i1;  // swap2(ix_[x1], ix_[x2]);
+    sw1_ = i1;
+    sw2_ = i2;
+  }
+
+ public:
+  bool next()
+  {
+    std::size_t j = 0;
+    std::size_t m1 = n_ - 1;  // nine in falling factorial base
+    std::size_t ij;
+    while ((ij = i_[j])) {
+      std::size_t im = i_[j];
+      std::size_t dj = d_[j] + im;
+      if (dj > m1)  // =^= if ( (dj>m1) || ((long)dj<0) )
+      {
+        i_[j] = -ij;
+      } else {
+        d_[j] = dj;
+        swap(j, im);
+        return true;
+      }
+
+      --m1;
+      ++j;
+      if (j >= k_) return false;
+    }
+    return false;
+  }
+
+  //    bool prev()
+  //    {
+  //        std::size_t j = 0;
+  //        std::size_t m1 = n_ - 1;
+  //        std::size_t ij;
+  //        while ( (ij=i_[j]) )
+  //        {
+  //            std::size_t im = -i_[j];
+  //            std::size_t dj = d_[j] + im;
+  //            if ( dj>m1 )  // =^= if ( (dj>m1) || ((long)dj<0) )
+  //            {
+  //                i_[j] = -ij;
+  //            }
+  //            else
+  //            {
+  //                d_[j] = dj;
+  //                swap(j, im);
+  //                return true;
+  //            }
+  //
+  //            --m1;
+  //            ++j;
+  //            if ( j>=k_ )  return false;
+  //        }
+  //        return false;
+  //    }
+};
+// -------------------------
+
+#endif  // !defined HAVE_KPERM_GRAY_H__
diff --git a/include/permanent/opt.h b/include/permanent/opt.h
new file mode 100644
index 0000000..00d42b0
--- /dev/null
+++ b/include/permanent/opt.h
@@ -0,0 +1,50 @@
+/* Copyright 2024 QC-Devs (GPLv3) */
+
+#if !defined(permanent_opt_h_)
+#define permanent_opt_h_
+
+#include <permanent/combinatoric.h>
+#include <permanent/common.h>
+#include <permanent/complex.h>
+#include <permanent/glynn.h>
+#include <permanent/ryser.h>
+
+#if defined(_PERMANENT_DEFAULT_TUNING)
+#include <permanent/tuning.default.h>
+#else
+#include <permanent/tuning.h>
+#endif
+
+namespace permanent {
+
+template <typename T, typename I = void>
+result_t<T, I> opt_square(const size_t m, const size_t n, const T *ptr)
+{
+  if (n <= PARAM_4<T, I>) {
+    return ryser_square<T, I>(m, n, ptr);
+  } else if (n * PARAM_1<T, I> + PARAM_2<T, I> + PARAM_3<T, I> > 0) {
+    return combinatoric_square<T, I>(m, n, ptr);
+  } else {
+    return glynn_square<T, I>(m, n, ptr);
+  }
+}
+
+template <typename T, typename I = void>
+result_t<T, I> opt_rectangular(const size_t m, const size_t n, const T *ptr)
+{
+  if (n * PARAM_1<T, I> + PARAM_2<T, I> * m / n + PARAM_3<T, I> > 0) {
+    return combinatoric_rectangular<T, I>(m, n, ptr);
+  } else {
+    return glynn_rectangular<T, I>(m, n, ptr);
+  }
+}
+
+template <typename T, typename I = void>
+result_t<T, I> opt(const size_t m, const size_t n, const T *ptr)
+{
+  return (m == n) ? opt_square<T, I>(m, n, ptr) : opt_rectangular<T, I>(m, n, ptr);
+}
+
+}  // namespace permanent
+
+#endif  // permanent_opt_h_
diff --git a/include/permanent/perm-mv0.h b/include/permanent/perm-mv0.h
new file mode 100644
index 0000000..8cffec1
--- /dev/null
+++ b/include/permanent/perm-mv0.h
@@ -0,0 +1,120 @@
+#if !defined HAVE_PERM_MV0_H__
+#define HAVE_PERM_MV0_H__
+// This file is part of the FXT library.
+// Copyright (C) 2010, 2011, 2012, 2014, 2019, 2023 Joerg Arndt
+// License: GNU General Public License version 3 or later,
+// see the file COPYING.txt in the main directory.
+
+#include <cstdlib>
+
+#include "swap.h"
+
+// define to update d[0] with each step:
+#define PERM_MV0_UPDATE_D0  // default is on
+
+// whether to use arrays instead of pointers:
+#define PERM_MV0_FIXARRAYS  // small speedup; default off
+
+class perm_mv0
+// Inverse permutations corresponding to falling factorial numbers.
+// CAT algorithm based on mixed radix Gray code
+//   for the factorial number system (falling base).
+{
+ public:
+#if !defined PERM_MV0_FIXARRAYS
+  std::size_t *d_;  // mixed radix digits with radix = [n-1, n-2, n-3, ..., 2]
+  std::size_t *x_;  // permutation
+#else
+  std::size_t d_[64];
+  std::size_t x_[64];
+#endif
+  std::size_t ect_;  // counter for easy case
+  std::size_t n_;    // permutations of n elements
+
+  perm_mv0(const perm_mv0 &) = delete;
+  perm_mv0 &operator=(const perm_mv0 &) = delete;
+
+ public:
+  explicit perm_mv0(std::size_t n)
+  // Must have n>=2
+  {
+    n_ = n;
+#if !defined PERM_MV0_FIXARRAYS
+    d_ = new std::size_t[n_];
+    x_ = new std::size_t[n_];
+#endif
+    d_[n - 1] = 1;  // sentinel (must be nonzero)
+    first();
+  }
+
+  ~perm_mv0()
+  {
+#if !defined PERM_MV0_FIXARRAYS
+    delete[] d_;
+    delete[] x_;
+#endif
+  }
+
+  const std::size_t *data() const { return x_; }
+
+  void first()
+  {
+    for (std::size_t k = 0; k < n_; ++k) x_[k] = k;
+    for (std::size_t k = 0; k < n_ - 1; ++k) d_[k] = 0;
+    ect_ = 0;
+  }
+
+  bool next()
+  {
+    if (++ect_ < n_)  // easy case
+    {
+#if defined PERM_MV0_UPDATE_D0
+      d_[0] = ect_;
+#endif
+      swap2(x_[ect_], x_[ect_ - 1]);
+      return true;
+    } else {
+      ect_ = 0;
+#if defined PERM_MV0_UPDATE_D0
+      d_[0] = ect_;
+#endif
+
+      std::size_t j = 1;
+      std::size_t m1 = n_ - 2;  // nine in falling factorial base
+      while (d_[j] == m1)       // find digit to increment
+      {
+        d_[j] = 0;
+        --m1;
+        ++j;
+      }
+
+      if (j == n_ - 1) return false;  // current permutation is last
+
+      const std::size_t dj = d_[j];
+      d_[j] = dj + 1;
+
+      // element at d[j] moves one position to the right:
+      swap2(x_[dj], x_[dj + 1]);
+
+      {  // move n-j elements to end:
+        std::size_t s = n_ - j, d = n_;
+        do {
+          --s;
+          --d;
+          x_[d] = x_[s];
+        } while (s);
+      }
+
+      // fill in 0,1,2,..,j-1 at start:
+      for (std::size_t k = 0; k < j; ++k) x_[k] = k;
+
+      return true;
+    }
+  }
+};
+// -------------------------
+
+// #undef PERM_MV0_UPDATE_D0
+// #undef PERM_MV0_FIXARRAYS
+
+#endif  // !defined HAVE_PERM_MV0_H__
diff --git a/include/permanent/permanent.h b/include/permanent/permanent.h
new file mode 100644
index 0000000..8ceca4c
--- /dev/null
+++ b/include/permanent/permanent.h
@@ -0,0 +1,493 @@
+/* Copyright 2024 QC-Devs (GPLv3) */
+
+#if !defined(permanent_permanent_h_)
+#define permanent_permanent_h_
+
+#include <permanent/common.h>
+#include <permanent/complex.h>
+#include <permanent/kperm-gray.h>
+#include <permanent/perm-mv0.h>
+#include <permanent/tables.h>
+
+#include <bit>
+
+#if defined(_PERMANENT_DEFAULT_TUNING)
+#include <permanent/tuning.default.h>
+#else
+#include <permanent/tuning.h>
+#endif
+
+namespace permanent {
+
+template <typename T, typename I = void>
+result_t<T, I> combinatoric_square(const size_t m, const size_t n, const T *ptr)
+{
+  (void)n;
+
+  perm_mv0 permutations(m);
+
+  result_t<T, I> out = 0;
+  const size_t *perm = permutations.data();
+  do {
+    result_t<T, I> prod = 1;
+    for (size_t i = 0; i != m; ++i) {
+      prod *= ptr[i * m + perm[i]];
+    }
+    out += prod;
+  } while (permutations.next());
+
+  return out;
+}
+
+template <typename T, typename I = void>
+result_t<T, I> combinatoric_rectangular(const size_t m, const size_t n, const T *ptr)
+{
+  kperm_gray permutations(n);
+  permutations.first(m);
+
+  result_t<T, I> out = 0;
+  const size_t *perm = permutations.data();
+  do {
+    result_t<T, I> prod = 1;
+    for (size_t i = 0; i != m; ++i) {
+      prod *= ptr[i * n + perm[i]];
+    }
+    out += prod;
+  } while (permutations.next());
+
+  return out;
+}
+
+template <typename T, typename I = void>
+result_t<T, I> glynn_square(const size_t m, const size_t n, const T *ptr)
+{
+  (void)n;
+
+  // Fill delta array ([+1...+1]) and permutation array ([0...m])
+  size_t perm[64 + 1];
+  sgn_t delta[64];
+  for (size_t i = 0; i != m; ++i) {
+    perm[i] = i;
+    delta[i] = 1;
+  }
+
+  // Compute first permutation
+  result_t<T, I> vec[64];
+  result_t<T, I> out = 1;
+  for (size_t j = 0; j != m; ++j) {
+    // Compute inner terms
+    result_t<T, I> sum = 0;
+    for (size_t i = 0; i != m; ++i) {
+      sum += ptr[i * m + j];
+    }
+    vec[j] = sum;
+
+    // Compute first outer term
+    out *= sum;
+  }
+
+  // Iterate from the second to the final permutation
+  size_t bound = m - 1;
+  size_t pos = 0;
+  sgn_t sign = -1;
+  while (pos != bound) {
+    // Update delta
+    size_t idx = bound - pos;
+    delta[idx] *= -1;
+
+    // Update inner terms
+    for (size_t i = 0; i != m; ++i) {
+      vec[i] += ptr[idx * m + i] * delta[idx] * 2;
+    }
+
+    // Add product of inner terms to outer term
+    result_t<T, I> prod = 1;
+    for (size_t i = 0; i != m; ++i) {
+      prod *= vec[i];
+    }
+    out += sign * prod;
+
+    // Go to next permutation
+    perm[0] = 0;
+    perm[pos] = perm[pos + 1];
+    ++pos;
+    perm[pos] = pos;
+    pos = perm[0];
+    sign *= -1;
+  }
+
+  // Divide outer term by external factor and return permanent
+  return out / (1UL << bound);
+}
+
+template <typename T, typename I = void>
+result_t<T, I> glynn_rectangular(const size_t m, const size_t n, const T *ptr)
+{
+  // Fill delta array ([+1...+1]) and permutation array ([0...m])
+  size_t perm[64 + 1];
+  sgn_t delta[64];
+  for (size_t i = 0; i != n; ++i) {
+    perm[i] = i;
+    delta[i] = 1;
+  }
+
+  // Compute first permutation
+  result_t<T, I> vec[64];
+  result_t<T, I> out = n * (n - m);
+  for (size_t j = 0; j != m; ++j) {
+    // Compute inner terms
+    result_t<T, I> sum = 0;
+    for (size_t i = 0; i != n; ++i) {
+      sum += ptr[j * n + i];
+    }
+    vec[j] = sum;
+
+    // Compute first outer term
+    out *= sum;
+  }
+  for (size_t j = m; j != n; ++j) {
+    vec[j] = n;
+  }
+
+  // Iterate from the second to the final permutation
+  size_t bound = n - 1;
+  size_t pos = 0;
+  sgn_t sign = -1;
+  while (pos != bound) {
+    // Update delta
+    size_t idx = bound - pos;
+    delta[idx] *= -1;
+
+    // Update inner terms
+    for (size_t i = 0; i != m; ++i) {
+      vec[i] += ptr[i * n + idx] * delta[idx] * 2;
+    }
+    for (size_t i = m; i != n; ++i) {
+      vec[i] += delta[idx] * 2;
+    }
+
+    // Add product of inner terms to outer term
+    result_t<T, I> prod = 1;
+    for (size_t i = 0; i != n; ++i) {
+      prod *= vec[i];
+    }
+    out += sign * prod;
+
+    // Go to next permutation
+    perm[0] = 0;
+    perm[pos] = perm[pos + 1];
+    ++pos;
+    perm[pos] = pos;
+    pos = perm[0];
+    sign *= -1;
+  }
+
+  // Divide outer term by external factor and return permanent
+  return (out / (1UL << bound)) / FACTORIAL(n - m);
+}
+
+template <typename T, typename I = void>
+result_t<T, I> glynn_rectangular2(const size_t m, const size_t n, const T *ptr)
+{
+  // Fill delta array ([+1...+1]) and permutation array ([0...m])
+  size_t perm[64 + 1];
+  sgn_t delta[64];
+  for (size_t i = 0; i != n; ++i) {
+    perm[i] = i;
+    delta[i] = 1;
+  }
+
+  // Compute first permutation
+  result_t<T, I> vec[64];
+  result_t<T, I> out = 1;
+  for (size_t j = 0; j != n; ++j) {
+    // Compute inner terms
+    result_t<T, I> sum = n - m;
+    for (size_t i = 0; i != m; ++i) {
+      sum += ptr[i * n + j];
+    }
+    vec[j] = sum;
+
+    // Compute first outer term
+    out *= sum;
+  }
+
+  // Iterate from the second to the final permutation
+  size_t bound = n - 1;
+  size_t pos = 0;
+  sgn_t sign = -1;
+  while (pos != bound) {
+    // Update delta
+    size_t idx = bound - pos;
+    delta[idx] *= -1;
+
+    // Update inner terms
+    if (idx < m) {
+      for (size_t i = 0; i != n; ++i) {
+        vec[i] += ptr[idx * n + i] * delta[idx] * 2;
+      }
+    } else {
+      for (size_t i = 0; i != n; ++i) {
+        vec[i] += delta[idx] * 2;
+      }
+    }
+
+    // Add product of inner terms to outer term
+    result_t<T, I> prod = 1;
+    for (size_t i = 0; i != n; ++i) {
+      prod *= vec[i];
+    }
+    out += sign * prod;
+
+    // Go to next permutation
+    perm[0] = 0;
+    perm[pos] = perm[pos + 1];
+    ++pos;
+    perm[pos] = pos;
+    pos = perm[0];
+    sign *= -1;
+  }
+
+  // Divide outer term by external factor and return permanent
+  return (out / (1UL << bound)) / FACTORIAL(n - m);
+}
+
+template <typename T, typename I = void>
+result_t<T, I> ryser_square(const size_t m, const size_t n, const T *ptr)
+{
+  (void)n;
+
+  size_t i, j;
+  size_t k;
+  result_t<T, I> rowsum;
+  result_t<T, I> out = 0;
+
+  /* Iterate over c = pow(2, m) submatrices (equal to (1 << m)) submatrices.
+   */
+
+  size_t c = 1UL << m;
+
+  /* Loop over columns of submatrix; compute product of row sums. */
+
+  for (k = 0; k < c; ++k) {
+    result_t<T, I> rowsumprod = 1.0;
+    for (i = 0; i < m; ++i) {
+      /* Loop over rows of submatrix; compute row sum. */
+
+      rowsum = 0.0;
+      for (j = 0; j < m; ++j) {
+        /* Add element to row sum if the row index is in the
+         * characteristic * vector of the submatrix, which is the binary
+         * vector given by k.  */
+
+        if (k & (1UL << j)) {
+          rowsum += ptr[m * i + j];
+        }
+      }
+
+      /* Update product of row sums. */
+
+      rowsumprod *= rowsum;
+    }
+
+    /* Add term multiplied by the parity of the characteristic vector. */
+
+    out += rowsumprod * parity(std::popcount(k));
+  }
+
+  /* Return answer with the correct sign (times -1 for odd m). */
+
+  return out * parity(m);
+}
+
+template <typename T>
+void swap2(T *perm, T i, T j)
+{
+  const T temp = perm[i];
+  perm[i] = perm[j];
+  perm[j] = temp;
+}
+
+template <typename T>
+void init_perm(const T N, T *const the_fact_set, T *const the_perm_set, T *const the_inv_set)
+{
+  for (T i = 0; i < N; i++) {
+    the_fact_set[i + 1] = 0;
+    the_perm_set[i] = i;
+    the_inv_set[i] = i;
+  }
+}
+
+template <typename T>
+bool gen_next_perm(T *const falling_fact, T *const perm, T *const inv_perm, const T cols,
+                   const T u_)
+{
+  /* Use the current permutation to check for next permutation
+   * lexicographically, if it exists update the curr_array by swapping the
+   * leftmost changed element (at position i < k). Replace the elements up to
+   * position k by the smallest element lying to the right of i. Do not put
+   * remaining k, .., n - 1 positions in ascending order to improve efficiency
+   * has time complexity O(n) in the worst case. */
+  T i = u_ - 1;
+  T m1 = cols - i - 1;
+
+  /* Begin update of falling_fact - recall falling_fact[0] = 0, so increment
+   * index accordingly. If i becomes negative during the check, you are
+   * pointing to the sentinel value, so you are done generating
+   * permutations. */
+  while (falling_fact[i + 1] == m1) {
+    falling_fact[i + 1] = 0;
+    ++m1;
+    --i;
+  }
+  if (i == 0UL - 1) {
+    return false;
+  }
+  ++falling_fact[i + 1];
+
+  /* Find smallest element perm[i] < perm[j] that lies to the right of
+   * pos i, and then update the state of the permutation using its inverse
+   * to generate next. */
+  T z = perm[i];
+  do {
+    ++z;
+  } while (inv_perm[z] <= i);
+  const T j = inv_perm[z];
+  swap2(perm, i, j);
+  swap2(inv_perm, perm[i], perm[j]);
+  ++i;
+  T y = 0;
+
+  /* Find the smallest elements to the right of position i. */
+  while (i < u_) {
+    while (inv_perm[y] < i) {
+      ++y;
+    }
+    const T k = inv_perm[y];
+    swap2(perm, i, k);
+    swap2(inv_perm, perm[i], perm[k]);
+    ++i;
+  }
+  return true;
+}
+
+template <typename T, typename I = void>
+result_t<T, I> ryser_rectangular(const size_t m, const size_t n, const T *ptr)
+{
+  size_t falling_fact[128];
+  size_t perm[128];
+  size_t inv_perm[128];
+  falling_fact[0] = 0;
+
+  size_t i, j, k;
+
+  int value_sign = 1;
+
+  result_t<T, I> sum_of_matrix_vals;
+  result_t<T, I> sum_over_k_vals = 0.0;
+  result_t<T, I> vec[64];
+
+  /* Dealing with a rectangle. Can't use bit hacking trick here. */
+  for (k = 0; k < m; ++k) {
+    /* Store the binomial coefficient for this k value bin_c. */
+    size_t counter = 0;  // Count how many permutations you have generated
+    size_t bin_c = BINOMIAL((n - m + k), k);
+    result_t<T, I> prod_of_cols = 1;
+    result_t<T, I> result = 0;
+
+    /* (Re)initialize the set to permute for this k value. */
+    init_perm(n, falling_fact, perm, inv_perm);
+
+    /* sort up to position u + 1 where u = min(m - k, n - 1). */
+    size_t sort_up_to = n - 1;
+
+    if ((m - k) < sort_up_to) {
+      sort_up_to = (m - k);
+    }
+
+    for (i = 0; i < m; ++i) {
+      sum_of_matrix_vals = 0.0;
+      for (j = 0; j < sort_up_to; ++j) {
+        sum_of_matrix_vals += ptr[i * n + perm[j]];
+      }
+      vec[i] = sum_of_matrix_vals;
+    }
+    for (i = 0; i < m; ++i) {
+      prod_of_cols *= vec[i];
+    }
+
+    result += value_sign * bin_c * prod_of_cols;
+    counter += 1;
+
+    /* Iterate over second to last permutations of the set. */
+    while (gen_next_perm(falling_fact, perm, inv_perm, n, sort_up_to)) {
+      for (i = 0; i < m; ++i) {
+        sum_of_matrix_vals = 0.0;
+        for (j = 0; j < m - k; ++j) {
+          sum_of_matrix_vals += ptr[i * n + perm[j]];
+        }
+        vec[i] = sum_of_matrix_vals;
+      }
+      prod_of_cols = 1.0;
+      for (i = 0; i < m; ++i) {
+        prod_of_cols *= vec[i];
+      }
+
+      result += value_sign * bin_c * prod_of_cols;
+      counter += 1;
+    }
+    sum_over_k_vals += (result / counter) * BINOMIAL(n, (m - k));
+    value_sign *= -1;
+  }
+  return sum_over_k_vals;
+}
+
+template <typename T, typename I = void>
+result_t<T, I> opt_square(const size_t m, const size_t n, const T *ptr)
+{
+  if (n <= PARAM_4) {
+    return ryser_square<T, I>(m, n, ptr);
+  } else if (n * PARAM_1 + PARAM_2 * m / n + PARAM_3 > 0) {
+    return combinatoric_square<T, I>(m, n, ptr);
+  } else {
+    return glynn_square<T, I>(m, n, ptr);
+  }
+}
+
+template <typename T, typename I = void>
+result_t<T, I> opt_rectangular(const size_t m, const size_t n, const T *ptr)
+{
+  if (n * PARAM_1 + PARAM_2 * m / n + PARAM_3 > 0) {
+    return combinatoric_rectangular<T, I>(m, n, ptr);
+  } else {
+    return glynn_rectangular<T, I>(m, n, ptr);
+  }
+}
+
+template <typename T, typename I = void>
+result_t<T, I> opt(const size_t m, const size_t n, const T *ptr)
+{
+    return (m == n) ? opt_square<T, I>(m, n, ptr) : opt_rectangular<T, I>(m, n, ptr);
+}
+
+template <typename T, typename I = void>
+result_t<T, I> combinatoric(const size_t m, const size_t n, const T *ptr)
+{
+    return (m == n) ? combinatoric_square<T, I>(m, n, ptr) : combinatoric_rectangular<T, I>(m, n, ptr);
+}
+
+template <typename T, typename I = void>
+result_t<T, I> ryser(const size_t m, const size_t n, const T *ptr)
+{
+    return (m == n) ? ryser_square<T, I>(m, n, ptr) : ryser_rectangular<T, I>(m, n, ptr);
+}
+
+template <typename T, typename I = void>
+result_t<T, I> glynn(const size_t m, const size_t n, const T *ptr)
+{
+    return (m == n) ? glynn_square<T, I>(m, n, ptr) : glynn_rectangular<T, I>(m, n, ptr);
+}
+
+}  // namespace permanent
+
+#endif  // permanent_permanent_h_
diff --git a/include/permanent/ryser.h b/include/permanent/ryser.h
new file mode 100644
index 0000000..aa03ab8
--- /dev/null
+++ b/include/permanent/ryser.h
@@ -0,0 +1,222 @@
+/* Copyright 2024 QC-Devs (GPLv3) */
+
+#if !defined(permanent_ryser_h_)
+#define permanent_ryser_h_
+
+#include <permanent/common.h>
+#include <permanent/complex.h>
+#include <permanent/tables.h>
+
+#include <bit>
+
+namespace {
+
+template <typename T>
+auto parity(const T &x)
+{
+  return 1 - ((x & 1) << 1);
+};
+
+template <typename T>
+void swap2(T *perm, T i, T j)
+{
+  const T temp = perm[i];
+  perm[i] = perm[j];
+  perm[j] = temp;
+}
+
+template <typename T>
+void init_perm(const T N, T *const the_fact_set, T *const the_perm_set, T *const the_inv_set)
+{
+  for (T i = 0; i < N; i++) {
+    the_fact_set[i + 1] = 0;
+    the_perm_set[i] = i;
+    the_inv_set[i] = i;
+  }
+}
+
+template <typename T>
+bool gen_next_perm(T *const falling_fact, T *const perm, T *const inv_perm, const T cols,
+                   const T u_)
+{
+  /* Use the current permutation to check for next permutation
+   * lexicographically, if it exists update the curr_array by swapping the
+   * leftmost changed element (at position i < k). Replace the elements up to
+   * position k by the smallest element lying to the right of i. Do not put
+   * remaining k, .., n - 1 positions in ascending order to improve efficiency
+   * has time complexity O(n) in the worst case. */
+  T i = u_ - 1;
+  T m1 = cols - i - 1;
+
+  /* Begin update of falling_fact - recall falling_fact[0] = 0, so increment
+   * index accordingly. If i becomes negative during the check, you are
+   * pointing to the sentinel value, so you are done generating
+   * permutations. */
+  while (falling_fact[i + 1] == m1) {
+    falling_fact[i + 1] = 0;
+    ++m1;
+    --i;
+  }
+  if (i == 0UL - 1) {
+    return false;
+  }
+  ++falling_fact[i + 1];
+
+  /* Find smallest element perm[i] < perm[j] that lies to the right of
+   * pos i, and then update the state of the permutation using its inverse
+   * to generate next. */
+  T z = perm[i];
+  do {
+    ++z;
+  } while (inv_perm[z] <= i);
+  const T j = inv_perm[z];
+  swap2(perm, i, j);
+  swap2(inv_perm, perm[i], perm[j]);
+  ++i;
+  T y = 0;
+
+  /* Find the smallest elements to the right of position i. */
+  while (i < u_) {
+    while (inv_perm[y] < i) {
+      ++y;
+    }
+    const T k = inv_perm[y];
+    swap2(perm, i, k);
+    swap2(inv_perm, perm[i], perm[k]);
+    ++i;
+  }
+  return true;
+}
+
+}  // namespace
+
+namespace permanent {
+
+template <typename T, typename I = void>
+result_t<T, I> ryser_square(const size_t m, const size_t n, const T *ptr)
+{
+  (void)n;
+
+  size_t i, j;
+  size_t k;
+  result_t<T, I> rowsum;
+  result_t<T, I> out = 0;
+
+  /* Iterate over c = pow(2, m) submatrices (equal to (1 << m)) submatrices.
+   */
+
+  size_t c = 1UL << m;
+
+  /* Loop over columns of submatrix; compute product of row sums. */
+
+  for (k = 0; k < c; ++k) {
+    result_t<T, I> rowsumprod = 1.0;
+    for (i = 0; i < m; ++i) {
+      /* Loop over rows of submatrix; compute row sum. */
+
+      rowsum = 0.0;
+      for (j = 0; j < m; ++j) {
+        /* Add element to row sum if the row index is in the
+         * characteristic * vector of the submatrix, which is the binary
+         * vector given by k.  */
+
+        if (k & (1UL << j)) {
+          rowsum += ptr[m * i + j];
+        }
+      }
+
+      /* Update product of row sums. */
+
+      rowsumprod *= rowsum;
+    }
+
+    /* Add term multiplied by the parity of the characteristic vector. */
+
+    out += rowsumprod * parity(std::popcount(k));
+  }
+
+  /* Return answer with the correct sign (times -1 for odd m). */
+
+  return out * parity(m);
+}
+
+template <typename T, typename I = void>
+result_t<T, I> ryser_rectangular(const size_t m, const size_t n, const T *ptr)
+{
+  size_t falling_fact[128];
+  size_t perm[128];
+  size_t inv_perm[128];
+  falling_fact[0] = 0;
+
+  size_t i, j, k;
+
+  int value_sign = 1;
+
+  result_t<T, I> sum_of_matrix_vals;
+  result_t<T, I> sum_over_k_vals = 0.0;
+  result_t<T, I> vec[64];
+
+  /* Dealing with a rectangle. Can't use bit hacking trick here. */
+  for (k = 0; k < m; ++k) {
+    /* Store the binomial coefficient for this k value bin_c. */
+    size_t counter = 0;  // Count how many permutations you have generated
+    size_t bin_c = binomial((n - m + k), k);
+    result_t<T, I> prod_of_cols = 1;
+    result_t<T, I> result = 0;
+
+    /* (Re)initialize the set to permute for this k value. */
+    init_perm(n, falling_fact, perm, inv_perm);
+
+    /* sort up to position u + 1 where u = min(m - k, n - 1). */
+    size_t sort_up_to = n - 1;
+
+    if ((m - k) < sort_up_to) {
+      sort_up_to = (m - k);
+    }
+
+    for (i = 0; i < m; ++i) {
+      sum_of_matrix_vals = 0.0;
+      for (j = 0; j < sort_up_to; ++j) {
+        sum_of_matrix_vals += ptr[i * n + perm[j]];
+      }
+      vec[i] = sum_of_matrix_vals;
+    }
+    for (i = 0; i < m; ++i) {
+      prod_of_cols *= vec[i];
+    }
+
+    result += value_sign * bin_c * prod_of_cols;
+    counter += 1;
+
+    /* Iterate over second to last permutations of the set. */
+    while (gen_next_perm(falling_fact, perm, inv_perm, n, sort_up_to)) {
+      for (i = 0; i < m; ++i) {
+        sum_of_matrix_vals = 0.0;
+        for (j = 0; j < m - k; ++j) {
+          sum_of_matrix_vals += ptr[i * n + perm[j]];
+        }
+        vec[i] = sum_of_matrix_vals;
+      }
+      prod_of_cols = 1.0;
+      for (i = 0; i < m; ++i) {
+        prod_of_cols *= vec[i];
+      }
+
+      result += value_sign * bin_c * prod_of_cols;
+      counter += 1;
+    }
+    sum_over_k_vals += (result / counter) * binomial(n, (m - k));
+    value_sign *= -1;
+  }
+  return sum_over_k_vals;
+}
+
+template <typename T, typename I = void>
+result_t<T, I> ryser(const size_t m, const size_t n, const T *ptr)
+{
+  return (m == n) ? ryser_square<T, I>(m, n, ptr) : ryser_rectangular<T, I>(m, n, ptr);
+}
+
+}  // namespace permanent
+
+#endif  // permanent_ryser_h_
diff --git a/src/swap.h b/include/permanent/swap.h
similarity index 62%
rename from src/swap.h
rename to include/permanent/swap.h
index 529844d..f08cc62 100644
--- a/src/swap.h
+++ b/include/permanent/swap.h
@@ -1,20 +1,25 @@
-#if !defined  HAVE_SWAP_H__
-#define       HAVE_SWAP_H__
+#if !defined HAVE_SWAP_H__
+#define HAVE_SWAP_H__
 // This file is part of the FXT library.
 // Copyright (C) 2010 Joerg Arndt
 // License: GNU General Public License version 3 or later,
 // see the file COPYING.txt in the main directory.
 
-
 template <typename Type>
-static inline void  swap2(Type &x, Type &y)
+static inline void swap2(Type &x, Type &y)
 // swap values
-{ Type t(x);  x = y;  y = t; }
+{
+  Type t(x);
+  x = y;
+  y = t;
+}
 
 template <typename Type>
-static inline void  swap0(Type &x, Type &y)
+static inline void swap0(Type &x, Type &y)
 // swap() for y known to be zero
-{ y = x;  x = 0; }
-
+{
+  y = x;
+  x = 0;
+}
 
 #endif  // !defined HAVE_SWAP_H__
diff --git a/src/tables.h b/include/permanent/tables.h
similarity index 95%
rename from src/tables.h
rename to include/permanent/tables.h
index d779861..6a7bd0e 100644
--- a/src/tables.h
+++ b/include/permanent/tables.h
@@ -1,15 +1,16 @@
-/* Copyright 2034 QC-Devs (GPLv3) */
+/* Copyright 2024 QC-Devs (GPLv3) */
 
-#ifndef TABLES_H_
-#define TABLES_H_
+#if !defined(permanent_tables_h_)
+#define permanent_tables_h_
 
-#include <cstdlib>
+#include <permanent/common.h>
 
-#define BINOMIAL(N, K) binom_table[65 * N + K]
+#include <cstdint>
+#include <type_traits>
 
-#define FACTORIAL(N) factorial_table[N]
+namespace permanent {
 
-static const std::size_t binom_table[] = {
+static constexpr size_t binom_table[] = {
     1,
     0,
     0,
@@ -4237,29 +4238,67 @@ static const std::size_t binom_table[] = {
     1,
 };
 
-static const double factorial_table[] = {
-    1.0000000000000000e+00, 1.0000000000000000e+00, 2.0000000000000000e+00,
-    6.0000000000000000e+00, 2.4000000000000000e+01, 1.2000000000000000e+02,
-    7.2000000000000000e+02, 5.0400000000000000e+03, 4.0320000000000000e+04,
-    3.6288000000000000e+05, 3.6288000000000000e+06, 3.9916800000000000e+07,
-    4.7900160000000000e+08, 6.2270208000000000e+09, 8.7178291200000000e+10,
-    1.3076743680000000e+12, 2.0922789888000000e+13, 3.5568742809600000e+14,
-    6.4023737057280000e+15, 1.2164510040883200e+17, 2.4329020081766400e+18,
-    5.1090942171709440e+19, 1.1240007277776077e+21, 2.5852016738884978e+22,
-    6.2044840173323941e+23, 1.5511210043330984e+25, 4.0329146112660572e+26,
-    1.0888869450418352e+28, 3.0488834461171380e+29, 8.8417619937397008e+30,
-    2.6525285981219110e+32, 8.2228386541779236e+33, 2.6313083693369355e+35,
-    8.6833176188118871e+36, 2.9523279903960420e+38, 1.0333147966386147e+40,
-    3.7199332678990118e+41, 1.3763753091226346e+43, 5.2302261746660112e+44,
-    2.0397882081197442e+46, 8.1591528324789801e+47, 3.3452526613163818e+49,
-    1.4050061177528803e+51, 6.0415263063373834e+52, 2.6582715747884485e+54,
-    1.1962222086548019e+56, 5.5026221598120899e+57, 2.5862324151116818e+59,
-    1.2413915592536073e+61, 6.0828186403426752e+62, 3.0414093201713381e+64,
-    1.5511187532873824e+66, 8.0658175170943877e+67, 4.2748832840600255e+69,
-    2.3084369733924138e+71, 1.2696403353658278e+73, 7.1099858780486358e+74,
-    4.0526919504877214e+76, 2.3505613312828785e+78, 1.3868311854568984e+80,
-    8.3209871127413899e+81, 5.0758021387722484e+83, 3.1469973260387939e+85,
-    1.9826083154044403e+87, 1.2688693218588415e+89,
+static constexpr double double_factorial_table[] = {
+    1.0000000000000000e+00, 1.0000000000000000e+00, 2.0000000000000000e+00, 6.0000000000000000e+00,
+    2.4000000000000000e+01, 1.2000000000000000e+02, 7.2000000000000000e+02, 5.0400000000000000e+03,
+    4.0320000000000000e+04, 3.6288000000000000e+05, 3.6288000000000000e+06, 3.9916800000000000e+07,
+    4.7900160000000000e+08, 6.2270208000000000e+09, 8.7178291200000000e+10, 1.3076743680000000e+12,
+    2.0922789888000000e+13, 3.5568742809600000e+14, 6.4023737057280000e+15, 1.2164510040883200e+17,
+    2.4329020081766400e+18, 5.1090942171709440e+19, 1.1240007277776077e+21, 2.5852016738884978e+22,
+    6.2044840173323941e+23, 1.5511210043330984e+25, 4.0329146112660572e+26, 1.0888869450418352e+28,
+    3.0488834461171380e+29, 8.8417619937397008e+30, 2.6525285981219110e+32, 8.2228386541779236e+33,
+    2.6313083693369355e+35, 8.6833176188118871e+36, 2.9523279903960420e+38, 1.0333147966386147e+40,
+    3.7199332678990118e+41, 1.3763753091226346e+43, 5.2302261746660112e+44, 2.0397882081197442e+46,
+    8.1591528324789801e+47, 3.3452526613163818e+49, 1.4050061177528803e+51, 6.0415263063373834e+52,
+    2.6582715747884485e+54, 1.1962222086548019e+56, 5.5026221598120899e+57, 2.5862324151116818e+59,
+    1.2413915592536073e+61, 6.0828186403426752e+62, 3.0414093201713381e+64, 1.5511187532873824e+66,
+    8.0658175170943877e+67, 4.2748832840600255e+69, 2.3084369733924138e+71, 1.2696403353658278e+73,
+    7.1099858780486358e+74, 4.0526919504877214e+76, 2.3505613312828785e+78, 1.3868311854568984e+80,
+    8.3209871127413899e+81, 5.0758021387722484e+83, 3.1469973260387939e+85, 1.9826083154044403e+87,
+    1.2688693218588415e+89,
 };
 
-#endif  // TABLES_H_
+static constexpr std::uint64_t int_factorial_table[] = {
+    1,
+    1,
+    2,
+    6,
+    24,
+    120,
+    720,
+    5040,
+    40320,
+    362880,
+    3628800,
+    39916800,
+    479001600,
+    6227020800,
+    87178291200,
+    1307674368000,
+    20922789888000,
+    355687428096000,
+    6402373705728000,
+    121645100408832000,
+    2432902008176640000,
+};
+
+inline size_t binomial(const size_t n, const size_t k)
+{
+  return permanent::binom_table[65 * n + k];
+}
+
+template <typename T>
+inline std::enable_if_t<std::is_integral_v<T>, std::uint64_t> factorial(const size_t n)
+{
+  return permanent::int_factorial_table[n];
+}
+
+template <typename T>
+inline std::enable_if_t<!std::is_integral_v<T>, double> factorial(const size_t n)
+{
+  return permanent::double_factorial_table[n];
+}
+
+}  // namespace permanent
+
+#endif  // permanent_tables_h_
diff --git a/include/permanent/tuning.default.h b/include/permanent/tuning.default.h
new file mode 100644
index 0000000..a73d11b
--- /dev/null
+++ b/include/permanent/tuning.default.h
@@ -0,0 +1,31 @@
+/* Copyright 2024 QC-Devs (GPLv3) */
+
+#if !defined(permanent_tuning_h_)
+#define permanent_tuning_h_
+
+namespace permanent {
+
+template <typename Type, typename IntType = void>
+struct _tuning_params_t
+{
+  static constexpr double PARAM_1 = -5.72098e-01;
+  static constexpr double PARAM_2 = -2.20142e+01;
+  static constexpr double PARAM_3 = +1.52979e+01;
+  static constexpr double PARAM_4 = +3.00000e+00;
+};
+
+template <typename Type, typename IntType = void>
+static constexpr double PARAM_1 = _tuning_params_t<Type, IntType>::PARAM_1;
+
+template <typename Type, typename IntType = void>
+static constexpr double PARAM_2 = _tuning_params_t<Type, IntType>::PARAM_2;
+
+template <typename Type, typename IntType = void>
+static constexpr double PARAM_3 = _tuning_params_t<Type, IntType>::PARAM_3;
+
+template <typename Type, typename IntType = void>
+static constexpr double PARAM_4 = _tuning_params_t<Type, IntType>::PARAM_4;
+
+}  // namespace permanent
+
+#endif  // permanent_tuning_h_
diff --git a/permanent/__init__.py b/permanent/__init__.py
deleted file mode 100644
index c0a21fd..0000000
--- a/permanent/__init__.py
+++ /dev/null
@@ -1,11 +0,0 @@
-r"""Module for computing the permanents of matrices."""
-
-__all__ = [
-    "opt",
-    "combinatoric",
-    "glynn",
-    "ryser",
-]
-
-
-from .permanent import opt, combinatoric, glynn, ryser
diff --git a/permanent/test/__init__.py b/permanent/test/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/permanent/test/test_permanent.py b/permanent/test/test_permanent.py
deleted file mode 100644
index dcc5bb4..0000000
--- a/permanent/test/test_permanent.py
+++ /dev/null
@@ -1,239 +0,0 @@
-import numpy as np
-import math
-import permanent
-
-
-REL_ERROR = 0.0001
-
-
-# Test square matrices
-def test_2by2_comb():
-    matrix = np.arange(1, 5, dtype=np.double).reshape(2, 2)
-    assert abs((permanent.combinatoric(matrix) - 10) / 10) <= REL_ERROR
-
-
-def test_2by2_glynn():
-    matrix = np.arange(1, 5, dtype=np.double).reshape(2, 2)
-    assert abs((permanent.glynn(matrix) - 10) / 10) <= REL_ERROR
-
-
-def test_2by2_ryser():
-    matrix = np.arange(1, 5, dtype=np.double).reshape(2, 2)
-    assert abs((permanent.ryser(matrix) - 10) / 10) <= REL_ERROR
-
-
-def test_3by3_comb():
-    matrix = np.arange(1, 10, dtype=np.double).reshape(3, 3)
-    assert abs((permanent.combinatoric(matrix) - 450) / 450) <= REL_ERROR
-
-
-def test_3by3_glynn():
-    matrix = np.arange(1, 10, dtype=np.double).reshape(3, 3)
-    assert abs((permanent.glynn(matrix) - 450) / 450) <= REL_ERROR
-
-
-def test_3by3_ryser():
-    matrix = np.arange(1, 10, dtype=np.double).reshape(3, 3)
-    assert abs((permanent.ryser(matrix) - 450) / 450) <= REL_ERROR
-
-
-def test_4by4_comb():
-    matrix = np.arange(1, 17, dtype=np.double).reshape(4, 4)
-    assert abs((permanent.combinatoric(matrix) - 55456) / 55456) <= REL_ERROR
-
-
-def test_4by4_glynn():
-    matrix = np.arange(1, 17, dtype=np.double).reshape(4, 4)
-    assert abs((permanent.glynn(matrix) - 55456) / 55456) <= REL_ERROR
-
-
-def test_4by4_ryser():
-    matrix = np.arange(1, 17, dtype=np.double).reshape(4, 4)
-    assert abs((permanent.ryser(matrix) - 55456) / 55456) <= REL_ERROR
-
-
-def test_7by7_comb():
-    matrix = np.arange(1, 50, dtype=np.double).reshape(7, 7)
-    assert (
-        abs((permanent.combinatoric(matrix) - 5373548250000) / 5373548250000)
-        <= REL_ERROR
-    )
-
-
-def test_7by7_glynn():
-    matrix = np.arange(1, 50, dtype=np.double).reshape(7, 7)
-    assert abs((permanent.glynn(matrix) - 5373548250000) / 5373548250000) <= REL_ERROR
-
-
-def test_7by7_ryser():
-    matrix = np.arange(1, 50, dtype=np.double).reshape(7, 7)
-    assert abs((permanent.ryser(matrix) - 5373548250000) / 5373548250000) <= REL_ERROR
-
-
-## Test rectangular matrices
-def test_2by3_comb():
-    matrix = np.arange(1, 7, dtype=np.double).reshape(2, 3)
-    assert abs((permanent.combinatoric(matrix) - 58) / 58) <= REL_ERROR
-
-
-def test_2by3_glynn():
-    matrix = np.arange(1, 7, dtype=np.double).reshape(2, 3)
-    assert abs((permanent.glynn(matrix) - 58) / 58) <= REL_ERROR
-
-
-def test_2by3_ryser():
-    matrix = np.arange(1, 7, dtype=np.double).reshape(2, 3)
-    assert abs((permanent.ryser(matrix) - 58) / 58) <= REL_ERROR
-
-
-def test_2by4_comb():
-    matrix = np.arange(1, 9, dtype=np.double).reshape(2, 4)
-    assert abs((permanent.combinatoric(matrix) - 190) / 190) <= REL_ERROR
-
-
-def test_2by4_glynn():
-    matrix = np.arange(1, 9, dtype=np.double).reshape(2, 4)
-    assert abs((permanent.glynn(matrix) - 190) / 190) <= REL_ERROR
-
-
-def test_2by4_ryser():
-    matrix = np.arange(1, 9, dtype=np.double).reshape(2, 4)
-    assert abs((permanent.ryser(matrix) - 190) / 190) <= REL_ERROR
-
-
-def test_2by7_comb():
-    matrix = np.arange(1, 15, dtype=np.double).reshape(2, 7)
-    assert abs((permanent.combinatoric(matrix) - 1820) / 1820) <= REL_ERROR
-
-
-def test_2by7_glynn():
-    matrix = np.arange(1, 15, dtype=np.double).reshape(2, 7)
-    assert abs((permanent.glynn(matrix) - 1820) / 1820) <= REL_ERROR
-
-
-def test_2by7_ryser():
-    matrix = np.arange(1, 15, dtype=np.double).reshape(2, 7)
-    assert abs((permanent.ryser(matrix) - 1820) / 1820) <= REL_ERROR
-
-
-def test_5by7_comb():
-    matrix = np.arange(1, 36, dtype=np.double).reshape(5, 7)
-    assert abs((permanent.combinatoric(matrix) - 1521238320) / 1521238320) <= REL_ERROR
-
-
-def test_5by7_glynn():
-    matrix = np.arange(1, 36, dtype=np.double).reshape(5, 7)
-    assert abs((permanent.glynn(matrix) - 1521238320) / 1521238320) <= REL_ERROR
-
-
-def test_5by7_ryser():
-    matrix = np.arange(1, 36, dtype=np.double).reshape(5, 7)
-    assert abs((permanent.ryser(matrix) - 1521238320) / 1521238320) <= REL_ERROR
-
-
-def test_6by7_comb():
-    matrix = np.arange(1, 43, dtype=np.double).reshape(6, 7)
-    assert (
-        abs((permanent.combinatoric(matrix) - 117681979920) / 117681979920) <= REL_ERROR
-    )
-
-
-def test_6by7_glynn():
-    matrix = np.arange(1, 43, dtype=np.double).reshape(6, 7)
-    assert abs((permanent.glynn(matrix) - 117681979920) / 117681979920) <= REL_ERROR
-
-
-def test_6by7_ryser():
-    matrix = np.arange(1, 43, dtype=np.double).reshape(6, 7)
-    assert abs((permanent.ryser(matrix) - 117681979920) / 117681979920) <= REL_ERROR
-
-
-def test_ones_comb():
-    matrix = np.ones((10, 10), dtype=np.double)
-    assert (
-        abs((permanent.combinatoric(matrix) - math.factorial(10)) / math.factorial(10))
-        <= REL_ERROR
-    )
-
-
-def test_ones_ryser():
-    matrix = np.ones((10, 10), dtype=np.double)
-    assert (
-        abs((permanent.ryser(matrix) - math.factorial(10)) / math.factorial(10))
-        <= REL_ERROR
-    )
-
-
-def test_ones_glynn():
-    matrix = np.ones((10, 10), dtype=np.double)
-    assert (
-        abs((permanent.glynn(matrix) - math.factorial(10)) / math.factorial(10))
-        <= REL_ERROR
-    )
-
-
-def test_ones_comb_big():
-    matrix = np.ones((12, 12), dtype=np.double)
-    assert (
-        abs((permanent.combinatoric(matrix) - math.factorial(12)) / math.factorial(12))
-        <= REL_ERROR
-    )
-
-
-def test_ones_ryser_big():
-    matrix = np.ones((12, 12), dtype=np.double)
-    assert (
-        abs((permanent.ryser(matrix) - math.factorial(12)) / math.factorial(12))
-        <= REL_ERROR
-    )
-
-
-def test_ones_glynn_big():
-    matrix = np.ones((12, 12), dtype=np.double)
-    assert (
-        abs((permanent.glynn(matrix) - math.factorial(12)) / math.factorial(12))
-        <= REL_ERROR
-    )
-
-
-def test_identity_comb():
-    matrix = np.identity(10, dtype=np.double)
-    assert abs((permanent.combinatoric(matrix) - 1) / 1) <= REL_ERROR
-
-
-def test_identity_ryser():
-    matrix = np.identity(10, dtype=np.double)
-    assert abs((permanent.ryser(matrix) - 1) / 1) <= REL_ERROR
-
-
-def test_identity_glynn():
-    matrix = np.identity(10, dtype=np.double)
-    assert abs((permanent.glynn(matrix) - 1) / 1) <= REL_ERROR
-
-
-def test_identity_ryser_odd():
-    matrix = np.identity(5, dtype=np.double)
-    assert abs((permanent.ryser(matrix) - 1) / 1) <= REL_ERROR
-
-
-def test_identity_glynn_odd():
-    matrix = np.identity(5, dtype=np.double)
-    assert abs((permanent.glynn(matrix) - 1) / 1) <= REL_ERROR
-
-
-def test_identity_comb_diag():
-    matrix = np.ones((3, 7), dtype=np.double)
-    diag_matrix = np.diag(np.diag(matrix))
-    assert abs((permanent.combinatoric(diag_matrix) - 1) / 1) <= REL_ERROR
-
-
-def test_identity_ryser_diag():
-    matrix = np.ones((3, 7), dtype=np.double)
-    diag_matrix = np.diag(np.diag(matrix))
-    assert abs((permanent.ryser(diag_matrix) - 1) / 1) <= REL_ERROR
-
-
-def test_identity_glynn_diag():
-    matrix = np.ones((3, 7), dtype=np.double)
-    diag_matrix = np.diag(np.diag(matrix))
-    assert abs((permanent.glynn(diag_matrix) - 1) / 1) <= REL_ERROR
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..09d5480
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,72 @@
+[build-system]
+requires = ["numpy>=2.0", "pandas", "scikit-learn", "scikit-build-core>=0.3.3"]
+build-backend = "scikit_build_core.build"
+
+[project]
+name = "permanent"
+version = "0.0.1"
+description = "Extension module for computing permanents of square and rectangular matrices."
+readme = "README.md"
+requires-python = ">=3.9"
+authors = [{name = "QC-Devs", email = "qcdevs@gmail.com"}]
+keywords = ["math", "linear algebra", "combinatorics", "permanent"]
+classifiers = [
+  "Development Status :: 4 - Beta",
+  # "Development Status :: 5 - Production/Stable",
+  "Environment :: Console",
+  "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
+  "Operating System :: POSIX :: Linux",
+  "Operating System :: MacOS",
+  "Operating System :: Microsoft :: Windows",
+  "Topic :: Scientific/Engineering :: Mathematics",
+  "Intended Audience :: Science/Research",
+  "Intended Audience :: Education",
+  "Intended Audience :: Developers",
+  "Natural Language :: English",
+  "Programming Language :: Python :: 3.9",
+  "Programming Language :: Python :: 3.10",
+  "Programming Language :: Python :: 3.11",
+  "Programming Language :: Python :: 3.12",
+]
+license = {file = "LICENSE"}
+urls = {home = "https://permanent.qcdevs.org/"}
+dependencies = ["numpy>=2.0"]
+
+[project.optional-dependencies]
+test = ["pytest"]
+
+[tool.scikit-build]
+wheel.expand-macos-universal-tags = true
+
+[tool.pytest.ini_options]
+minversion = "6.0"
+addopts = ["-ra", "--showlocals", "--strict-markers", "--strict-config"]
+xfail_strict = true
+log_cli_level = "INFO"
+filterwarnings = [
+  "error",
+  "ignore::pytest.PytestCacheWarning",
+]
+testpaths = ["tests"]
+
+[tool.cibuildwheel]
+build-frontend = "build[uv]"
+test-command = "pytest tests"
+test-extras = ["test"]
+
+[tool.cibuildwheel.pyodide]
+environment.CFLAGS = "-fexceptions"
+environment.LDFLAGS = "-fexceptions"
+build-frontend = {name = "build", args = ["--exports", "whole_archive"]}
+
+[tool.ruff]
+target-version = "py311"
+
+[tool.ruff.lint]
+extend-select = [
+  "B",    # flake8-bugbear
+  "I",    # isort
+  "PGH",  # pygrep-hooks
+  "RUF",  # Ruff-specific
+  "UP",   # pyupgrade
+]
diff --git a/setup.py b/setup.py
deleted file mode 100644
index 5899066..0000000
--- a/setup.py
+++ /dev/null
@@ -1,77 +0,0 @@
-#!/usr/bin/env python3
-
-from setuptools import setup
-
-
-name = "permanent"
-
-
-version = "0.1.0"
-
-
-licence = "GPLv3"
-
-
-author = "QC-Devs"
-
-
-author_email = "email@address.com"
-
-
-url = "https://permanent.qcdevs.org/"
-
-
-description = "Functions to compute the permanent of arbitrary matrices."
-
-
-long_description = open("README.md", "r", encoding="utf-8").read()
-
-
-classifiers = [
-    "Environment :: Console",
-    "Intended Audience :: Science/Research",
-    "Programming Language :: Python :: 3",
-    # "Topic :: Science/Engineering :: Molecular Science",
-    # Figure out what to put here before publishing the package
-]
-
-
-install_requires = [
-    "numpy>=1.13",
-]
-
-
-extras_require = {
-    "test": ["pytest"],
-    "docs": ["sphinx", "sphinx_rtd_theme"],
-}
-
-
-packages = [
-    "permanent",
-    "permanent.test",
-]
-
-
-package_data = {
-    "permanent": ["permanent.so"],
-}
-
-
-if __name__ == "__main__":
-    setup(
-        name=name,
-        version=version,
-        license=licence,
-        author=author,
-        author_email=author_email,
-        url=url,
-        description=description,
-        long_description=long_description,
-        classifiers=classifiers,
-        install_requires=install_requires,
-        extras_require=extras_require,
-        packages=packages,
-        package_data=package_data,
-        include_package_data=True,
-    )
diff --git a/src/kperm-gray.h b/src/kperm-gray.h
deleted file mode 100644
index 1ada5ab..0000000
--- a/src/kperm-gray.h
+++ /dev/null
@@ -1,151 +0,0 @@
-#if !defined  HAVE_KPERM_GRAY_H__
-#define       HAVE_KPERM_GRAY_H__
-// This file is part of the FXT library.
-// Copyright (C) 2010, 2012, 2013, 2014, 2019, 2023 Joerg Arndt
-// License: GNU General Public License version 3 or later,
-// see the file COPYING.txt in the main directory.
-
-
-#include <cstdlib>
-
-#include "swap.h"
-
-
-class kperm_gray
-// Gray code for k-permutations of n elements.
-// Same as: k-prefixes of permutations of n elements.
-// Same as: arrangements of k out of n elements.
-// CAT algorithm based on mixed radix Gray code
-//   for the factorial number system (falling base).
-{
-protected:
-    std::size_t d_[64];  // mixed radix digits with radix = [n-1, n-2, ..., 2]
-    std::size_t i_[64];  // directions
-    std::size_t ix_[64]; // permutation (inverse perms in Trotter's order)
-    std::size_t x_[64];  // inverse permutation (perms in Trotter's order)
-    std::size_t n_;   // total of n elements
-    std::size_t k_;   // prefix length: permutations of k elements
-    std::size_t sw1_, sw2_;  // indices of elements swapped most recently
-
-    kperm_gray(const kperm_gray&) = delete;
-    kperm_gray & operator = (const kperm_gray&) = delete;
-
-public:
-    explicit kperm_gray(std::size_t n)
-    {
-        n_ = n;
-        k_ = n;
-        // d_ = new std::size_t[n_];
-        d_[n-1] = -1UL;  // sentinel
-        // i_ = new std::size_t[n_];
-        // x_ = new std::size_t[n_];
-        // ix_ = new std::size_t[n_];
-        i_[n_-1] = 0;
-        first(n_);
-    }
-
-    ~kperm_gray()
-    {
-        // delete [] i_;
-        // delete [] d_;
-        // delete [] x_;
-        // delete [] ix_;
-    }
-
-    const std::size_t * data()  const  { return ix_; }
-    const std::size_t * invdata()  const  { return x_; }
-    void get_swap(std::size_t &s1, std::size_t &s2)  const  { s1=sw1_; s2=sw2_; }
-
-    const std::size_t * mr_digits()  const  { return d_; }
-
-
-    void first(std::size_t k)
-    {
-        k_ = k;
-        for (std::size_t j=0; j<n_-1; ++j)  d_[j] = 0;
-        for (std::size_t j=0; j<n_-1; ++j)  i_[j] = +1;
-        for (std::size_t j=0; j<n_; ++j)  x_[j] = ix_[j] = j;
-        sw1_=n_-1;  sw2_=n_-2;
-    }
-
-//    void last(std::size_t k)
-//    {
-//        first(k);  // k_ is set with call
-//        d_[n_-2] = 1;
-//        swap2(x_[n_-1], x_[n_-2]);
-//        swap2(ix_[n_-1], ix_[n_-2]);
-//        for (std::size_t j=0; j<n_-2; ++j)  i_[j] = -1UL;
-//        i_[n_-2] = +1;
-//    }
-
-private:
-    void swap(std::size_t j, std::size_t im)
-    {
-        const std::size_t x1 = j;        // element j
-        const std::size_t i1 = ix_[x1];  // position of j
-        const std::size_t i2 = i1 + im;  // neighbor
-        const std::size_t x2 = x_[i2];   // position of neighbor
-        x_[i1] = x2;   x_[i2] = x1;   // swap2(x_[i1], x_[i2]);
-        ix_[x1] = i2;  ix_[x2] = i1;  // swap2(ix_[x1], ix_[x2]);
-        sw1_=i1;  sw2_=i2;
-    }
-
-public:
-    bool next()
-    {
-        std::size_t j = 0;
-        std::size_t m1 = n_ - 1;  // nine in falling factorial base
-        std::size_t ij;
-        while ( (ij=i_[j]) )
-        {
-            std::size_t im = i_[j];
-            std::size_t dj = d_[j] + im;
-            if ( dj>m1 )  // =^= if ( (dj>m1) || ((long)dj<0) )
-            {
-                i_[j] = -ij;
-            }
-            else
-            {
-                d_[j] = dj;
-                swap(j, im);
-                return true;
-            }
-
-            --m1;
-            ++j;
-            if ( j>=k_ )  return false;
-        }
-        return false;
-    }
-
-//    bool prev()
-//    {
-//        std::size_t j = 0;
-//        std::size_t m1 = n_ - 1;
-//        std::size_t ij;
-//        while ( (ij=i_[j]) )
-//        {
-//            std::size_t im = -i_[j];
-//            std::size_t dj = d_[j] + im;
-//            if ( dj>m1 )  // =^= if ( (dj>m1) || ((long)dj<0) )
-//            {
-//                i_[j] = -ij;
-//            }
-//            else
-//            {
-//                d_[j] = dj;
-//                swap(j, im);
-//                return true;
-//            }
-//
-//            --m1;
-//            ++j;
-//            if ( j>=k_ )  return false;
-//        }
-//        return false;
-//    }
-};
-// -------------------------
-
-
-#endif  // !defined HAVE_KPERM_GRAY_H__
diff --git a/src/main.cc b/src/main.cc
new file mode 100644
index 0000000..48d1c1a
--- /dev/null
+++ b/src/main.cc
@@ -0,0 +1,416 @@
+/* Copyright 2024 QC-Devs (GPLv3) */
+
+#include <Python.h>
+
+#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
+#include <numpy/arrayobject.h>
+#include <numpy/ndarraytypes.h>
+#include <permanent.h>
+
+#include <complex>
+#include <cstdlib>
+
+#define LITERAL(S) #S
+#define STRINGIZE(S) LITERAL(S)
+
+#define PERMANENT_VERSION STRINGIZE(_PERMANENT_VERSION)
+#define PERMANENT_COMPILER_VERSION STRINGIZE(_PERMANENT_COMPILER_VERSION)
+#define PERMANENT_GIT_BRANCH STRINGIZE(_PERMANENT_GIT_BRANCH)
+#define PERMANENT_GIT_COMMIT_HASH STRINGIZE(_PERMANENT_GIT_COMMIT_HASH)
+
+static const char DOCSTRING_MAIN_MODULE[] = R"""(
+Permanent module.
+
+)""";
+
+static const char DOCSTRING_VERSION_INFO_MODULE[] = R"""(
+version information.
+
+)""";
+
+static const char DOCSTRING_PERMANENT[] = R"""(
+Compute the permanent of a matrix using the best algorithm for the shape of the given matrix.
+
+Parameters
+----------
+matrix : np.ndarray(M, N, dtype=(np.double|np.complex))
+
+Returns
+-------
+permanent : (np.double|np.complex)
+    Permanent of matrix.
+
+)""";
+
+static const char DOCSTRING_COMBINATORIC[] = R"""(
+Compute the permanent of a matrix combinatorically.
+
+.. math::
+
+    \text{per}(A) = \sum_{\sigma \in P(N,M)}{
+        \prod_{i=1}^M{a_{i,{\sigma(i)}}}
+    }
+
+Parameters
+----------
+matrix : np.ndarray(M, N, dtype=(np.double|np.complex))
+
+Returns
+-------
+permanent : (np.double|np.complex)
+    Permanent of matrix.
+
+)""";
+
+static const char DOCSTRING_GLYNN[] = R"""(
+Compute the permanent of a matrix via Glynn's algorithm [Glynn]_.
+
+.. math::
+
+    \text{per}(A) = \frac{1}{2^{N-1}} \cdot \sum_{\delta}{
+        \left(\sum_{k=1}^N{\delta_k}\right)
+        \prod_{j=1}^N{\sum_{i=1}^N{\delta_i a_{i,j}}}
+    }
+
+The original formula has been generalized here to work with
+:math:`M`-by-:math:`N` rectangular permanents with :math:`M \leq N`
+by use of the following identity (shown here for :math:`M \geq N`):
+
+.. math::
+
+    {\text{per}}\left(
+        \begin{matrix}
+            a_{1,1} & \cdots & a_{1,N} \\
+            \vdots & \ddots & \vdots \\
+            a_{M,1} & \cdots & a_{M,N} \\
+        \end{matrix}
+    \right)
+    = \frac{1}{\left(M - N + 1\right)!} \cdot {\text{per}}\left(
+        \begin{matrix}
+            a_{1,1} & \cdots & a_{1,N} & 1_{1,N+1} & \cdots & 1_{1,M} \\
+            \vdots & \ddots & \vdots & \vdots & \ddots & \vdots \\
+            a_{M,1} & \cdots & a_{M,N} & 1_{M,N+1} & \cdots & 1_{M,M} \\
+        \end{matrix}
+    \right)
+
+This can be neatly fit into the original formula by extending the inner sums
+over :math:`\delta` from :math:`\left[1,M\right]` to :math:`\left[1,N\right]`:
+
+.. math::
+
+    \text{per}(A) = \frac{1}{2^{N-1}} \cdot \frac{1}{\left(N - M + 1\right)!}
+    \cdot \sum_{\delta}{
+        \left(\sum_{k=1}^N{\delta_k}\right)
+        \prod_{j=1}^N{\left(
+            \sum_{i=1}^M{\delta_i a_{i,j}} + \sum_{i=M+1}^N{\delta_i}
+        \right)}
+    }
+
+.. [Glynn] Glynn, D. G. (2010). The permanent of a square matrix.
+           *European Journal of Combinatorics*, 31(7), 1887-1891.
+
+Parameters
+----------
+matrix : np.ndarray(M, N, dtype=(np.double|np.complex))
+
+Returns
+-------
+permanent : (np.double|np.complex)
+    Permanent of matrix.
+
+)""";
+
+static const char DOCSTRING_RYSER[] = R"""(
+Compute the permanent of a matrix via Ryser's algorithm [Ryser]_.
+
+.. math::
+
+    \text{per}(A) = \sum_{k=0}^{M-1}{
+        {\left(-1\right)}^k
+        \left(\begin{matrix}N - M + k\\ k\end{matrix}\right)
+        \sum_{\sigma \in P(N,M-k)}{
+            \prod_{i=1}^M{
+                \sum_{j=1}^{M-k}{a_{i,{\sigma(j)}}}
+            }
+        }
+    }
+for
+.. [Ryser] Ryser, H. J. (1963). *Combinatorial Mathematics* (Vol. 14).
+           American Mathematical Soc..
+
+Parameters
+----------
+matrix : np.ndarray(M, N, dtype=(np.double|np.complex))
+
+Returns
+-------
+permanent : (np.double|np.complex)
+    Permanent of matrix.
+
+)""";
+
+#define PERMANENT_DISPATCH_ARRAY_TYPE(FN, MATRIX)                                          \
+  {                                                                                        \
+    int type = PyArray_TYPE(MATRIX);                                                       \
+    if (type == NPY_INT8) {                                                                \
+      if (std::abs<ptrdiff_t>(static_cast<ptrdiff_t>(m) - n) > 20) {                       \
+        PyErr_SetString(PyExc_ValueError,                                                  \
+                        "Difference between # cols and # rows cannot exceed 20");          \
+        return nullptr;                                                                    \
+      }                                                                                    \
+      const std::int8_t *ptr =                                                             \
+          reinterpret_cast<const std::int8_t *>(PyArray_GETPTR2(MATRIX, 0, 0));            \
+      return PyLong_FromSsize_t(FN<std::int8_t, Py_ssize_t>(m, n, ptr));                   \
+    } else if (type == NPY_INT16) {                                                        \
+      if (std::abs<ptrdiff_t>(static_cast<ptrdiff_t>(m) - n) > 20) {                       \
+        PyErr_SetString(PyExc_ValueError,                                                  \
+                        "Difference between # cols and # rows cannot exceed 20");          \
+        return nullptr;                                                                    \
+      }                                                                                    \
+      const std::int16_t *ptr =                                                            \
+          reinterpret_cast<const std::int16_t *>(PyArray_GETPTR2(MATRIX, 0, 0));           \
+      return PyLong_FromSsize_t(FN<std::int16_t, Py_ssize_t>(m, n, ptr));                  \
+    } else if (type == NPY_INT32) {                                                        \
+      if (std::abs<ptrdiff_t>(static_cast<ptrdiff_t>(m) - n) > 20) {                       \
+        PyErr_SetString(PyExc_ValueError,                                                  \
+                        "Difference between # cols and # rows cannot exceed 20");          \
+        return nullptr;                                                                    \
+      }                                                                                    \
+      const std::int32_t *ptr =                                                            \
+          reinterpret_cast<const std::int32_t *>(PyArray_GETPTR2(MATRIX, 0, 0));           \
+      return PyLong_FromSsize_t(FN<std::int32_t, Py_ssize_t>(m, n, ptr));                  \
+    } else if (type == NPY_INT64) {                                                        \
+      if (std::abs<ptrdiff_t>(static_cast<ptrdiff_t>(m) - n) > 20) {                       \
+        PyErr_SetString(PyExc_ValueError,                                                  \
+                        "Difference between # cols and # rows cannot exceed 20");          \
+        return nullptr;                                                                    \
+      }                                                                                    \
+      const std::int64_t *ptr =                                                            \
+          reinterpret_cast<const std::int64_t *>(PyArray_GETPTR2(MATRIX, 0, 0));           \
+      return PyLong_FromSsize_t(FN<std::int64_t, Py_ssize_t>(m, n, ptr));                  \
+    } else if (type == NPY_UINT8) {                                                        \
+      if (std::abs<ptrdiff_t>(static_cast<ptrdiff_t>(m) - n) > 20) {                       \
+        PyErr_SetString(PyExc_ValueError,                                                  \
+                        "Difference between # cols and # rows cannot exceed 20");          \
+        return nullptr;                                                                    \
+      }                                                                                    \
+      const std::uint8_t *ptr =                                                            \
+          reinterpret_cast<const std::uint8_t *>(PyArray_GETPTR2(MATRIX, 0, 0));           \
+      return PyLong_FromSsize_t(FN<std::uint8_t, Py_ssize_t>(m, n, ptr));                  \
+    } else if (type == NPY_UINT16) {                                                       \
+      if (std::abs<ptrdiff_t>(static_cast<ptrdiff_t>(m) - n) > 20) {                       \
+        PyErr_SetString(PyExc_ValueError,                                                  \
+                        "Difference between # cols and # rows cannot exceed 20");          \
+        return nullptr;                                                                    \
+      }                                                                                    \
+      const std::uint16_t *ptr =                                                           \
+          reinterpret_cast<const std::uint16_t *>(PyArray_GETPTR2(MATRIX, 0, 0));          \
+      return PyLong_FromSsize_t(FN<std::uint16_t, Py_ssize_t>(m, n, ptr));                 \
+    } else if (type == NPY_UINT32) {                                                       \
+      if (std::abs<ptrdiff_t>(static_cast<ptrdiff_t>(m) - n) > 20) {                       \
+        PyErr_SetString(PyExc_ValueError,                                                  \
+                        "Difference between # cols and # rows cannot exceed 20");          \
+        return nullptr;                                                                    \
+      }                                                                                    \
+      const std::uint32_t *ptr =                                                           \
+          reinterpret_cast<const std::uint32_t *>(PyArray_GETPTR2(MATRIX, 0, 0));          \
+      return PyLong_FromSsize_t(FN<std::uint32_t, Py_ssize_t>(m, n, ptr));                 \
+    } else if (type == NPY_UINT64) {                                                       \
+      if (std::abs<ptrdiff_t>(static_cast<ptrdiff_t>(m) - n) > 20) {                       \
+        PyErr_SetString(PyExc_ValueError,                                                  \
+                        "Difference between # cols and # rows cannot exceed 20");          \
+        return nullptr;                                                                    \
+      }                                                                                    \
+      const std::uint64_t *ptr =                                                           \
+          reinterpret_cast<const std::uint64_t *>(PyArray_GETPTR2(MATRIX, 0, 0));          \
+      return PyLong_FromSsize_t(FN<std::uint64_t, Py_ssize_t>(m, n, ptr));                 \
+    } else if (type == NPY_FLOAT) {                                                        \
+      const float *ptr = reinterpret_cast<const float *>(PyArray_GETPTR2(MATRIX, 0, 0));   \
+      return PyFloat_FromDouble(FN<float>(m, n, ptr));                                     \
+    } else if (type == NPY_DOUBLE) {                                                       \
+      const double *ptr = reinterpret_cast<const double *>(PyArray_GETPTR2(MATRIX, 0, 0)); \
+      return PyFloat_FromDouble(FN<double>(m, n, ptr));                                    \
+    } else if (type == NPY_COMPLEX64) {                                                    \
+      std::complex<float> *ptr =                                                           \
+          reinterpret_cast<std::complex<float> *>(PyArray_GETPTR2(MATRIX, 0, 0));          \
+      std::complex<double> out = FN<std::complex<float>>(m, n, ptr);                       \
+      return PyComplex_FromDoubles(out.real(), out.imag());                                \
+    } else if (type == NPY_COMPLEX128) {                                                   \
+      std::complex<double> *ptr =                                                          \
+          reinterpret_cast<std::complex<double> *>(PyArray_GETPTR2(MATRIX, 0, 0));         \
+      std::complex<double> out = FN<std::complex<double>>(m, n, ptr);                      \
+      return PyComplex_FromDoubles(out.real(), out.imag());                                \
+    } else {                                                                               \
+      PyErr_SetString(PyExc_TypeError, "Array has unsupported dtype");                     \
+      return nullptr;                                                                      \
+    }                                                                                      \
+  }
+
+static PyObject *py_opt(PyObject *module, PyObject *object)
+{
+  (void)module;
+
+  PyArrayObject *matrix = reinterpret_cast<PyArrayObject *>(
+      PyArray_FromAny(object, nullptr, 2, 2, NPY_ARRAY_CARRAY_RO, nullptr));
+  if (!matrix) {
+    PyErr_SetString(PyExc_TypeError, "Argument must be a 2-dimensional array");
+    return nullptr;
+  }
+
+  size_t m = PyArray_DIMS(matrix)[0];
+  size_t n = PyArray_DIMS(matrix)[1];
+  if (m > 64 || n > 64) {
+    PyErr_SetString(PyExc_ValueError, "Argument array must have <=64 rows and columns");
+    return nullptr;
+  } else if (m > n) {
+    return py_opt(module, PyArray_Transpose(matrix, nullptr));
+  }
+
+  PERMANENT_DISPATCH_ARRAY_TYPE(permanent::opt, matrix)
+}
+
+static PyObject *py_combinatoric(PyObject *module, PyObject *object)
+{
+  (void)module;
+
+  PyArrayObject *matrix = reinterpret_cast<PyArrayObject *>(
+      PyArray_FromAny(object, nullptr, 2, 2, NPY_ARRAY_CARRAY_RO, nullptr));
+  if (!matrix) {
+    PyErr_SetString(PyExc_TypeError, "Argument must be a 2-dimensional array");
+    return nullptr;
+  }
+
+  size_t m = PyArray_DIMS(matrix)[0];
+  size_t n = PyArray_DIMS(matrix)[1];
+  if (m > 64 || n > 64) {
+    PyErr_SetString(PyExc_ValueError, "Argument array must have <=64 rows and columns");
+    return nullptr;
+  } else if (m > n) {
+    return py_combinatoric(module, PyArray_Transpose(matrix, nullptr));
+  }
+
+  PERMANENT_DISPATCH_ARRAY_TYPE(permanent::combinatoric, matrix)
+}
+
+static PyObject *py_glynn(PyObject *module, PyObject *object)
+{
+  (void)module;
+
+  PyArrayObject *matrix = reinterpret_cast<PyArrayObject *>(
+      PyArray_FromAny(object, nullptr, 2, 2, NPY_ARRAY_CARRAY_RO, nullptr));
+  if (!matrix) {
+    PyErr_SetString(PyExc_TypeError, "Argument must be a 2-dimensional array");
+    return nullptr;
+  }
+
+  size_t m = PyArray_DIMS(matrix)[0];
+  size_t n = PyArray_DIMS(matrix)[1];
+  if (m > 64 || n > 64) {
+    PyErr_SetString(PyExc_ValueError, "Argument array must have <=64 rows and columns");
+    return nullptr;
+  } else if (m > n) {
+    return py_glynn(module, PyArray_Transpose(matrix, nullptr));
+  }
+
+  PERMANENT_DISPATCH_ARRAY_TYPE(permanent::glynn, matrix)
+}
+
+static PyObject *py_ryser(PyObject *module, PyObject *object)
+{
+  PyArrayObject *matrix = reinterpret_cast<PyArrayObject *>(
+      PyArray_FromAny(object, nullptr, 2, 2, NPY_ARRAY_CARRAY_RO, nullptr));
+  if (!matrix) {
+    PyErr_SetString(PyExc_TypeError, "Argument must be a 2-dimensional array");
+    return nullptr;
+  }
+
+  size_t m = PyArray_DIMS(matrix)[0];
+  size_t n = PyArray_DIMS(matrix)[1];
+  if (m > 64 || n > 64) {
+    PyErr_SetString(PyExc_ValueError, "Argument array must have <=64 rows and columns");
+    return nullptr;
+  } else if (m > n) {
+    return py_ryser(module, PyArray_Transpose(matrix, nullptr));
+  }
+
+  PERMANENT_DISPATCH_ARRAY_TYPE(permanent::ryser, matrix)
+}
+
+/* Define the Python methods that will go into the C extension module.       *
+ *                                                                           *
+ * Note:  METH_O indicates that the Python function takes a single argument. *
+ *        On the C side, the function takes two PyObject* arguments;         *
+ *        the first one is the C extension module itself,                    *
+ *        and the second one is the argument to the Python function.         */
+
+static PyMethodDef main_methods[] = {
+    /* Python function name     C function          Args flag   Docstring */
+    {"opt", py_opt, METH_O, DOCSTRING_PERMANENT},
+    {"combinatoric", py_combinatoric, METH_O, DOCSTRING_COMBINATORIC},
+    {"glynn", py_glynn, METH_O, DOCSTRING_GLYNN},
+    {"ryser", py_ryser, METH_O, DOCSTRING_RYSER},
+    {nullptr, nullptr, 0, nullptr} /* sentinel value */
+};
+
+static PyMethodDef version_info_methods[] = {
+    {nullptr, nullptr, 0, nullptr} /* sentinel value */
+};
+
+/* Define the C extension module. */
+
+static struct PyModuleDef main_def = {
+    PyModuleDef_HEAD_INIT,
+    "permanent",
+    DOCSTRING_MAIN_MODULE,
+    -1,
+    main_methods,
+    nullptr,
+    nullptr,
+    nullptr,
+    nullptr,
+};
+
+static struct PyModuleDef version_info_def = {
+    PyModuleDef_HEAD_INIT,
+    "version_info",
+    DOCSTRING_VERSION_INFO_MODULE,
+    -1,
+    version_info_methods,
+    nullptr,
+    nullptr,
+    nullptr,
+    nullptr,
+};
+
+/* Initialize the C extension module. */
+
+// cppcheck-suppress unusedFunction
+PyMODINIT_FUNC PyInit_permanent(void)
+{
+  /* Initialize Python API */
+  Py_Initialize();
+
+  /* Initialize NumPy NDArray API */
+  import_array();
+
+  /* Create version info module. */
+  auto *version_info_module = PyModule_Create(&version_info_def);
+  PyModule_AddStringConstant(version_info_module, "version", PERMANENT_VERSION);
+  PyModule_AddStringConstant(version_info_module, "compiler_version", PERMANENT_COMPILER_VERSION);
+  PyModule_AddStringConstant(version_info_module, "git_branch", PERMANENT_GIT_BRANCH);
+  PyModule_AddStringConstant(version_info_module, "git_commit_hash", PERMANENT_GIT_COMMIT_HASH);
+
+  /* Create main module. */
+  auto *main_module = PyModule_Create(&main_def);
+  PyModule_AddStringConstant(main_module, "__version__", PERMANENT_VERSION);
+  PyModule_AddObject(main_module, "version_info", version_info_module);
+  return main_module;
+}
+
+#undef PERMANENT_DISPATCH_ARRAY_TYPE
+#undef PERMANENT_VERSION
+#undef PERMANENT_COMPILER_VERSION
+#undef PERMANENT_GIT_BRANCH
+#undef PERMANENT_GIT_COMMIT_BRANCH
+#undef PERMANENT_BUILD_TIME
+#undef PERMANENT_COMPILER_VERSION
diff --git a/src/perm-mv0.h b/src/perm-mv0.h
deleted file mode 100644
index 762e8e9..0000000
--- a/src/perm-mv0.h
+++ /dev/null
@@ -1,128 +0,0 @@
-#if !defined  HAVE_PERM_MV0_H__
-#define       HAVE_PERM_MV0_H__
-// This file is part of the FXT library.
-// Copyright (C) 2010, 2011, 2012, 2014, 2019, 2023 Joerg Arndt
-// License: GNU General Public License version 3 or later,
-// see the file COPYING.txt in the main directory.
-
-
-#include <cstdlib>
-
-#include "swap.h"
-
-
-// define to update d[0] with each step:
-#define PERM_MV0_UPDATE_D0  // default is on
-
-// whether to use arrays instead of pointers:
-#define PERM_MV0_FIXARRAYS  // small speedup; default off
-
-class perm_mv0
-// Inverse permutations corresponding to falling factorial numbers.
-// CAT algorithm based on mixed radix Gray code
-//   for the factorial number system (falling base).
-{
-public:
-#if !defined PERM_MV0_FIXARRAYS
-    std::size_t *d_;  // mixed radix digits with radix = [n-1, n-2, n-3, ..., 2]
-    std::size_t *x_;  // permutation
-#else
-    std::size_t d_[64];
-    std::size_t x_[64];
-#endif
-    std::size_t ect_;  // counter for easy case
-    std::size_t n_;   // permutations of n elements
-
-    perm_mv0(const perm_mv0&) = delete;
-    perm_mv0 & operator = (const perm_mv0&) = delete;
-
-public:
-    explicit perm_mv0(std::size_t n)
-    // Must have n>=2
-    {
-        n_ = n;
-#if !defined PERM_MV0_FIXARRAYS
-        d_ = new std::size_t[n_];
-        x_ = new std::size_t[n_];
-#endif
-        d_[n-1] = 1;  // sentinel (must be nonzero)
-        first();
-    }
-
-    ~perm_mv0()
-    {
-#if !defined PERM_MV0_FIXARRAYS
-        delete [] d_;
-        delete [] x_;
-#endif
-    }
-
-    const std::size_t * data()  const  { return x_; }
-
-
-    void first()
-    {
-        for (std::size_t k=0; k<n_; ++k)  x_[k] = k;
-        for (std::size_t k=0; k<n_-1; ++k)  d_[k] = 0;
-        ect_ = 0;
-    }
-
-    bool next()
-    {
-        if ( ++ect_ < n_ )  // easy case
-        {
-#if defined PERM_MV0_UPDATE_D0
-            d_[0] = ect_;
-#endif
-            swap2( x_[ect_], x_[ect_-1] );
-            return true;
-        }
-        else
-        {
-            ect_ = 0;
-#if defined PERM_MV0_UPDATE_D0
-            d_[0] = ect_;
-#endif
-
-            std::size_t j = 1;
-            std::size_t m1 = n_ - 2;   // nine in falling factorial base
-            while ( d_[j]==m1 )  // find digit to increment
-            {
-                d_[j] = 0;
-                --m1;
-                ++j;
-            }
-
-            if ( j==n_-1 )  return false;  // current permutation is last
-
-            const std::size_t dj = d_[j];
-            d_[j] = dj + 1;
-
-            // element at d[j] moves one position to the right:
-            swap2( x_[dj], x_[dj+1] );
-
-            {  // move n-j elements to end:
-                std::size_t s = n_-j,  d = n_;
-                do
-                {
-                    --s;
-                    --d;
-                    x_[d] = x_[s];
-                }
-                while ( s );
-            }
-
-            // fill in 0,1,2,..,j-1 at start:
-            for (std::size_t k=0; k<j; ++k)  x_[k] = k;
-
-            return true;
-        }
-    }
-};
-// -------------------------
-
-//#undef PERM_MV0_UPDATE_D0
-//#undef PERM_MV0_FIXARRAYS
-
-
-#endif  // !defined HAVE_PERM_MV0_H__
diff --git a/src/permanent.cc b/src/permanent.cc
deleted file mode 100644
index 83207fc..0000000
--- a/src/permanent.cc
+++ /dev/null
@@ -1,48 +0,0 @@
-/* Copyright 2034 QC-Devs (GPLv3) */
-
-#include "permanent.h"
-
-#include <complex>
-
-template double combinatoric<double>(const std::size_t, const std::size_t,
-                                     const double *);
-
-template double combinatoric_rectangular<double>(const std::size_t,
-                                                 const std::size_t,
-                                                 const double *);
-
-template double glynn<double>(const std::size_t, const std::size_t,
-                              const double *);
-
-template double glynn_rectangular<double>(const std::size_t, const std::size_t,
-                                          const double *);
-
-template double ryser<double>(const std::size_t, const std::size_t,
-                              const double *);
-
-template double ryser_rectangular<double>(const std::size_t, const std::size_t,
-                                          const double *);
-
-template double opt<double>(const std::size_t, const std::size_t,
-                            const double *);
-
-template std::complex<double> combinatoric<std::complex<double>>(
-    const std::size_t, const std::size_t, const std::complex<double> *);
-
-template std::complex<double> combinatoric_rectangular<std::complex<double>>(
-    const std::size_t, const std::size_t, const std::complex<double> *);
-
-template std::complex<double> glynn<std::complex<double>>(
-    const std::size_t, const std::size_t, const std::complex<double> *);
-
-template std::complex<double> glynn_rectangular<std::complex<double>>(
-    const std::size_t, const std::size_t, const std::complex<double> *);
-
-template std::complex<double> ryser<std::complex<double>>(
-    const std::size_t, const std::size_t, const std::complex<double> *);
-
-template std::complex<double> ryser_rectangular<std::complex<double>>(
-    const std::size_t, const std::size_t, const std::complex<double> *);
-
-template std::complex<double> opt<std::complex<double>>(
-    const std::size_t, const std::size_t, const std::complex<double> *);
diff --git a/src/permanent.h b/src/permanent.h
deleted file mode 100644
index 8e9247c..0000000
--- a/src/permanent.h
+++ /dev/null
@@ -1,515 +0,0 @@
-/* Copyright 2034 QC-Devs (GPLv3) */
-
-#ifndef PERMANENT_H_
-#define PERMANENT_H_
-
-#include <complex>
-#include <cstdlib>
-
-#include "kperm-gray.h"
-#include "perm-mv0.h"
-#include "tables.h"
-
-#ifdef WITH_TUNING_FILE
-
-/* Include tuning file. */
-
-#include "tuning.h"
-
-#else
-
-/* Set default tuning parameters. */
-
-constexpr double PARAM_1 = -0.572098;
-constexpr double PARAM_2 = -22.014212;
-constexpr double PARAM_3 = 15.29794;
-constexpr double PARAM_4 = 3.0;
-
-#endif  // WITH_TUNING_FILE
-
-/* Allow type promotion to complex. */
-
-template <typename T>
-struct identity_t {
-    typedef T type;
-};
-
-#define COMPLEX_OPS(OP)                                                       \
-                                                                              \
-    template <typename _Tp>                                                   \
-    std::complex<_Tp> operator OP(                                            \
-        std::complex<_Tp> lhs, const typename identity_t<_Tp>::type & rhs) {  \
-        return lhs OP rhs;                                                    \
-    }                                                                         \
-                                                                              \
-    template <typename _Tp>                                                   \
-    std::complex<_Tp> operator OP(const typename identity_t<_Tp>::type & lhs, \
-                                  const std::complex<_Tp> &rhs) {             \
-        return lhs OP rhs;                                                    \
-    }
-
-COMPLEX_OPS(+)
-COMPLEX_OPS(-)
-COMPLEX_OPS(*)
-COMPLEX_OPS(/)
-
-#undef COMPLEX_OPS
-
-template <typename T>
-T combinatoric(const std::size_t m, const std::size_t n, const T *ptr) {
-    (void)n;
-
-    perm_mv0 permutations(m);
-
-    const std::size_t *perm = permutations.data();
-
-    std::size_t i;
-    T out = 0.0;
-
-    do {
-        T prod = 1.0;
-        for (i = 0; i < m; ++i) {
-            prod *= ptr[i * m + perm[i]];
-        }
-        out += prod;
-    } while (permutations.next());
-
-    return out;
-}
-
-template <typename T>
-T combinatoric_rectangular(const std::size_t m, const std::size_t n,
-                           const T *ptr) {
-    kperm_gray permutations(n);
-    permutations.first(m);
-
-    const std::size_t *perm = permutations.data();
-
-    std::size_t i;
-    T out = 0.0;
-
-    do {
-        T prod = 1.0;
-        for (i = 0; i < m; ++i) {
-            prod *= ptr[i * n + perm[i]];
-        }
-        out += prod;
-    } while (permutations.next());
-
-    return out;
-}
-
-template <typename T>
-T glynn(const std::size_t m, const std::size_t n, const T *ptr) {
-    (void)n;
-
-    std::size_t i, j;
-    std::size_t pos = 0;
-    std::size_t bound = m - 1;
-    std::size_t perm[64 + 1];
-
-    int sign = 1;
-    int delta[64];
-
-    T sum;
-    T out = 1.0;
-    T vec[64];
-
-    /* Fill delta array (all +1 to start), and permutation array with [0...m].
-     */
-
-    for (i = 0; i < m; ++i) {
-        perm[i] = i;
-        delta[i] = 1;
-    }
-
-    /* Handle first permutation. */
-
-    for (j = 0; j < m; ++j) {
-        sum = 0.0;
-        for (i = 0; i < m; ++i) {
-            sum += ptr[i * m + j] * delta[i];
-        }
-        vec[j] = sum;
-    }
-    for (j = 0; j < m; ++j) {
-        out *= vec[j];
-    }
-
-    /* Iterate from the second to the final permutation. */
-
-    while (pos != bound) {
-        /* Update sign and delta. */
-
-        sign *= -1;
-        delta[bound - pos] *= -1;
-
-        /* Compute term. */
-
-        for (j = 0; j < m; ++j) {
-            sum = 0.0;
-            for (i = 0; i < m; ++i) {
-                sum += ptr[i * m + j] * delta[i];
-            }
-            vec[j] = sum;
-        }
-
-        /* Multiply by the product of the vectors in delta. */
-
-        T prod = 1.0;
-        for (i = 0; i < m; ++i) {
-            prod *= vec[i];
-        }
-        out += sign * prod;
-
-        /* Go to next permutation. */
-
-        perm[0] = 0;
-        perm[pos] = perm[pos + 1];
-        ++pos;
-        perm[pos] = pos;
-        pos = perm[0];
-    }
-
-    /* Divide by external factor and return permanent. */
-
-    return out / (1UL << bound);
-}
-
-template <typename T>
-T glynn_rectangular(const std::size_t m, const std::size_t n, const T *ptr) {
-    std::size_t i, j, k;
-    std::size_t pos = 0;
-    std::size_t bound = n - 1;
-    std::size_t perm[64 + 1];
-
-    int sign = 1;
-    int delta[64];
-
-    T sum;
-    T out = 1.0;
-    T vec[64];
-
-    /* Fill delta array (all +1 to start), and permutation array with [0...n].
-     */
-
-    for (i = 0; i < n; ++i) {
-        delta[i] = 1;
-        perm[i] = i;
-    }
-
-    /* Handle first permutation. */
-
-    for (j = 0; j < n; ++j) {
-        sum = 0.0;
-        for (i = 0; i < m; ++i) {
-            sum += ptr[i * n + j] * delta[i];
-        }
-        for (k = m; k < n; ++k) {
-            sum += delta[k];
-        }
-        vec[j] = sum;
-    }
-    for (i = 0; i < n; ++i) {
-        out *= vec[i];
-    }
-
-    /* Iterate from the second to the final permutation. */
-
-    while (pos != bound) {
-        /* Update sign and delta. */
-
-        sign *= -1;
-        delta[bound - pos] *= -1;
-
-        /* Compute term. */
-
-        for (j = 0; j < n; ++j) {
-            sum = 0.0;
-            for (i = 0; i < m; ++i) {
-                sum += ptr[i * n + j] * delta[i];
-            }
-
-            for (k = m; k < n; ++k) {
-                sum += delta[k];
-            }
-            vec[j] = sum;
-        }
-
-        /* Multiply by the product of the vectors in delta. */
-
-        T prod = 1.0;
-        for (i = 0; i < n; ++i) {
-            prod *= vec[i];
-        }
-        out += sign * prod;
-
-        /* Go to next permutation. */
-
-        perm[0] = 0;
-        perm[pos] = perm[pos + 1];
-        ++pos;
-        perm[pos] = pos;
-        pos = perm[0];
-    }
-
-    /* Divide by external factor and return permanent. */
-
-    return out / ((1UL << bound) * FACTORIAL(n - m));
-}
-
-template <typename T>
-T ryser(const std::size_t m, const std::size_t n, const T *ptr) {
-    (void)n;
-
-    std::size_t i, j;
-    std::size_t k;
-    T rowsum;
-    T out = 0;
-
-    /* Iterate over c = pow(2, m) submatrices (equal to (1 << m)) submatrices.
-     */
-
-    std::size_t c = 1UL << m;
-
-    /* Loop over columns of submatrix; compute product of row sums. */
-
-    for (k = 0; k < c; ++k) {
-        T rowsumprod = 1.0;
-        for (i = 0; i < m; ++i) {
-            /* Loop over rows of submatrix; compute row sum. */
-
-            rowsum = 0.0;
-            for (j = 0; j < m; ++j) {
-                /* Add element to row sum if the row index is in the
-                 * characteristic * vector of the submatrix, which is the binary
-                 * vector given by k.  */
-
-                if (k & (1UL << j)) {
-                    rowsum += ptr[m * i + j];
-                }
-            }
-
-            /* Update product of row sums. */
-
-            rowsumprod *= rowsum;
-        }
-
-        /* Add term multiplied by the parity of the characteristic vector. */
-
-        out += rowsumprod * (1 - ((__builtin_popcountll(k) & 1) << 1));
-    }
-
-    /* Return answer with the correct sign (times -1 for odd m). */
-
-    return out * ((m % 2 == 1) ? -1 : 1);
-}
-
-// template<typename T>
-// T ryser_rectangular(const std::size_t m, const std::size_t n, const T *ptr)
-// {
-//     kperm_gray permutations(n);
-//
-//     const std::size_t *perm = permutations.data();
-//
-//     std::size_t i, j, k, bin;
-//     int sign = 1;
-//
-//     T colprod, matsum, permsum;
-//     T out = 0.0;
-//     T vec[64];
-//
-//     /* Iterate over subsets from size 0 to size m */
-//
-//     for (k = 0; k < m; ++k) {
-//
-//         permutations.first(m - k);
-//
-//         bin = BINOMIAL((n - m + k), k);
-//         permsum = 0.0;
-//
-//         do {
-//
-//             /* Compute permanents of each submatrix */
-//
-//             for (i = 0; i < m; ++i) {
-//                 matsum = 0.0;
-//                 for (j = 0; j < m - k; ++j)
-//                     matsum += ptr[i * n + perm[j]];
-//                 vec[i] = matsum;
-//             }
-//
-//             colprod = 1.0;
-//             for (i = 0; i < m; ++i) {
-//                 colprod *= vec[i];
-//             }
-//
-//             permsum += colprod;
-//         }
-//         while (permutations.next());
-//
-//         /* Add term to result */
-//
-//         out += permsum * sign * bin;
-//
-//         sign *= -1;
-//     }
-//
-//     return out;
-// }
-
-template <typename T>
-void swap2(T *perm, T i, T j) {
-    const T temp = perm[i];
-    perm[i] = perm[j];
-    perm[j] = temp;
-}
-
-template <typename T>
-void init_perm(const T N, T *const the_fact_set, T *const the_perm_set,
-               T *const the_inv_set) {
-    for (T i = 0; i < N; i++) {
-        the_fact_set[i + 1] = 0;
-        the_perm_set[i] = i;
-        the_inv_set[i] = i;
-    }
-}
-
-template <typename T>
-bool gen_next_perm(T *const falling_fact, T *const perm, T *const inv_perm,
-                   const T cols, const T u_) {
-    /* Use the current permutation to check for next permutation
-     * lexicographically, if it exists update the curr_array by swapping the
-     * leftmost changed element (at position i < k). Replace the elements up to
-     * position k by the smallest element lying to the right of i. Do not put
-     * remaining k, .., n - 1 positions in ascending order to improve efficiency
-     * has time complexity O(n) in the worst case. */
-    T i = u_ - 1;
-    T m1 = cols - i - 1;
-
-    /* Begin update of falling_fact - recall falling_fact[0] = 0, so increment
-     * index accordingly. If i becomes negative during the check, you are
-     * pointing to the sentinel value, so you are done generating
-     * permutations. */
-    while (falling_fact[i + 1] == m1) {
-        falling_fact[i + 1] = 0;
-        ++m1;
-        --i;
-    }
-    if (i == 0UL - 1) {
-        return false;
-    }
-    ++falling_fact[i + 1];
-
-    /* Find smallest element perm[i] < perm[j] that lies to the right of
-     * pos i, and then update the state of the permutation using its inverse
-     * to generate next. */
-    T z = perm[i];
-    do {
-        ++z;
-    } while (inv_perm[z] <= i);
-    const T j = inv_perm[z];
-    swap2(perm, i, j);
-    swap2(inv_perm, perm[i], perm[j]);
-    ++i;
-    T y = 0;
-
-    /* Find the smallest elements to the right of position i. */
-    while (i < u_) {
-        while (inv_perm[y] < i) {
-            ++y;
-        }
-        const T k = inv_perm[y];
-        swap2(perm, i, k);
-        swap2(inv_perm, perm[i], perm[k]);
-        ++i;
-    }
-    return true;
-}
-
-template <typename T>
-T ryser_rectangular(const std::size_t m, const std::size_t n, const T *ptr) {
-    std::size_t falling_fact[128];
-    std::size_t perm[128];
-    std::size_t inv_perm[128];
-    falling_fact[0] = 0;
-
-    std::size_t i, j, k;
-
-    int value_sign = 1;
-
-    T sum_of_matrix_vals;
-    T sum_over_k_vals = 0.0;
-    T vec[64];
-
-    /* Dealing with a rectangle. Can't use bit hacking trick here. */
-    for (k = 0; k < m; ++k) {
-        /* Store the binomial coefficient for this k value bin_c. */
-        std::size_t counter =
-            0;  // Count how many permutations you have generated
-        T bin_c = BINOMIAL((n - m + k), k);
-        T prod_of_cols = 1.0;
-        T result = 0.0;
-
-        /* (Re)initialize the set to permute for this k value. */
-        init_perm(n, falling_fact, perm, inv_perm);
-
-        /* sort up to position u + 1 where u = min(m - k, n - 1). */
-        std::size_t sort_up_to = n - 1;
-
-        if ((m - k) < sort_up_to) {
-            sort_up_to = (m - k);
-        }
-
-        for (i = 0; i < m; ++i) {
-            sum_of_matrix_vals = 0.0;
-            for (j = 0; j < sort_up_to; ++j) {
-                sum_of_matrix_vals += ptr[i * n + perm[j]];
-            }
-            vec[i] = sum_of_matrix_vals;
-        }
-        for (i = 0; i < m; ++i) {
-            prod_of_cols *= vec[i];
-        }
-
-        result += value_sign * bin_c * prod_of_cols;
-        counter += 1;
-
-        /* Iterate over second to last permutations of the set. */
-        while (gen_next_perm(falling_fact, perm, inv_perm, n, sort_up_to)) {
-            for (i = 0; i < m; ++i) {
-                sum_of_matrix_vals = 0.0;
-                for (j = 0; j < m - k; ++j) {
-                    sum_of_matrix_vals += ptr[i * n + perm[j]];
-                }
-                vec[i] = sum_of_matrix_vals;
-            }
-            prod_of_cols = 1.0;
-            for (i = 0; i < m; ++i) {
-                prod_of_cols *= vec[i];
-            }
-
-            result += value_sign * bin_c * prod_of_cols;
-            counter += 1;
-        }
-        sum_over_k_vals += (result / counter) * BINOMIAL(n, (m - k));
-        value_sign *= -1;
-    }
-    return sum_over_k_vals;
-}
-
-template <typename T>
-T opt(const std::size_t m, const std::size_t n, const T *ptr) {
-    /* Use the fastest algorithm. */
-
-    if (m == n && n <= PARAM_4) {
-        return ryser(m, n, ptr);
-    } else if (n * PARAM_1 + PARAM_2 * m / n + PARAM_3 > 0) {
-        return (m == n) ? combinatoric(m, n, ptr)
-                        : combinatoric_rectangular(m, n, ptr);
-    } else {
-        return (m == n) ? glynn(m, n, ptr) : glynn_rectangular(m, n, ptr);
-    }
-}
-
-#endif  // PERMANENT_H_
diff --git a/src/py_permanent.cc b/src/py_permanent.cc
deleted file mode 100644
index eaeb223..0000000
--- a/src/py_permanent.cc
+++ /dev/null
@@ -1,328 +0,0 @@
-/* Copyright 2034 QC-Devs (GPLv3) */
-
-#include <Python.h>
-
-#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
-#include <numpy/arrayobject.h>
-#include <numpy/ndarraytypes.h>
-
-#include <complex>
-#include <cstdint>
-#include <cstdlib>
-
-#include "permanent.h"
-
-static const char DOCSTRING_MODULE[] = R"""(
-Permanent module C extension.
-
-)""";
-
-static const char DOCSTRING_PERMANENT[] = R"""(
-Compute the permanent of a matrix using the best algorithm for the shape of the given matrix.
-
-Parameters
-----------
-matrix : np.ndarray(M, N, dtype=(np.double|np.complex))
-
-Returns
--------
-permanent : (np.double|np.complex)
-    Permanent of matrix.
-
-)""";
-
-static const char DOCSTRING_COMBINATORIC[] = R"""(
-Compute the permanent of a matrix combinatorically.
-
-.. math::
-
-    \text{per}(A) = \sum_{\sigma \in P(N,M)}{
-        \prod_{i=1}^M{a_{i,{\sigma(i)}}}
-    }
-
-Parameters
-----------
-matrix : np.ndarray(M, N, dtype=(np.double|np.complex))
-
-Returns
--------
-permanent : (np.double|np.complex)
-    Permanent of matrix.
-
-)""";
-
-static const char DOCSTRING_GLYNN[] = R"""(
-Compute the permanent of a matrix via Glynn's algorithm [Glynn]_.
-
-.. math::
-
-    \text{per}(A) = \frac{1}{2^{N-1}} \cdot \sum_{\delta}{
-        \left(\sum_{k=1}^N{\delta_k}\right)
-        \prod_{j=1}^N{\sum_{i=1}^N{\delta_i a_{i,j}}}
-    }
-
-The original formula has been generalized here to work with
-:math:`M`-by-:math:`N` rectangular permanents with :math:`M \leq N`
-by use of the following identity (shown here for :math:`M \geq N`):
-
-.. math::
-
-    {\text{per}}\left(
-        \begin{matrix}
-            a_{1,1} & \cdots & a_{1,N} \\
-            \vdots & \ddots & \vdots \\
-            a_{M,1} & \cdots & a_{M,N} \\
-        \end{matrix}
-    \right)
-    = \frac{1}{\left(M - N + 1\right)!} \cdot {\text{per}}\left(
-        \begin{matrix}
-            a_{1,1} & \cdots & a_{1,N} & 1_{1,N+1} & \cdots & 1_{1,M} \\
-            \vdots & \ddots & \vdots & \vdots & \ddots & \vdots \\
-            a_{M,1} & \cdots & a_{M,N} & 1_{M,N+1} & \cdots & 1_{M,M} \\
-        \end{matrix}
-    \right)
-
-This can be neatly fit into the original formula by extending the inner sums
-over :math:`\delta` from :math:`\left[1,M\right]` to :math:`\left[1,N\right]`:
-
-.. math::
-
-    \text{per}(A) = \frac{1}{2^{N-1}} \cdot \frac{1}{\left(N - M + 1\right)!}
-    \cdot \sum_{\delta}{
-        \left(\sum_{k=1}^N{\delta_k}\right)
-        \prod_{j=1}^N{\left(
-            \sum_{i=1}^M{\delta_i a_{i,j}} + \sum_{i=M+1}^N{\delta_i}
-        \right)}
-    }
-
-.. [Glynn] Glynn, D. G. (2010). The permanent of a square matrix.
-           *European Journal of Combinatorics*, 31(7), 1887-1891.
-
-Parameters
-----------
-matrix : np.ndarray(M, N, dtype=(np.double|np.complex))
-
-Returns
--------
-permanent : (np.double|np.complex)
-    Permanent of matrix.
-
-)""";
-
-static const char DOCSTRING_RYSER[] = R"""(
-Compute the permanent of a matrix via Ryser's algorithm [Ryser]_.
-
-.. math::
-
-    \text{per}(A) = \sum_{k=0}^{M-1}{
-        {\left(-1\right)}^k
-        \left(\begin{matrix}N - M + k\\ k\end{matrix}\right)
-        \sum_{\sigma \in P(N,M-k)}{
-            \prod_{i=1}^M{
-                \sum_{j=1}^{M-k}{a_{i,{\sigma(j)}}}
-            }
-        }
-    }
-
-.. [Ryser] Ryser, H. J. (1963). *Combinatorial Mathematics* (Vol. 14).
-           American Mathematical Soc..
-
-Parameters
-----------
-matrix : np.ndarray(M, N, dtype=(np.double|np.complex))
-
-Returns
--------
-permanent : (np.double|np.complex)
-    Permanent of matrix.
-
-)""";
-
-static PyObject *py_opt(PyObject *module, PyObject *object) {
-    (void)module;
-
-    PyArrayObject *matrix = reinterpret_cast<PyArrayObject *>(
-        PyArray_FromAny(object, nullptr, 2, 2, NPY_ARRAY_CARRAY_RO, nullptr));
-    if (!matrix) {
-        PyErr_SetString(PyExc_TypeError,
-                        "Argument must be a 2-dimensional array");
-        return nullptr;
-    }
-
-    std::size_t m = PyArray_DIMS(matrix)[0];
-    std::size_t n = PyArray_DIMS(matrix)[1];
-    if (m > n) {
-        return py_opt(module, PyArray_Transpose(matrix, nullptr));
-    } else if (m > 64 || n > 64) {
-        PyErr_SetString(PyExc_ValueError,
-                        "Argument array must have <=64 rows and columns");
-        return nullptr;
-    }
-
-    int type = PyArray_TYPE(matrix);
-    if (type == NPY_DOUBLE) {
-        const double *ptr =
-            reinterpret_cast<const double *>(PyArray_GETPTR2(matrix, 0, 0));
-        return PyFloat_FromDouble(opt<double>(m, n, ptr));
-    } else if (type == NPY_COMPLEX128) {
-        std::complex<double> *ptr = reinterpret_cast<std::complex<double> *>(
-            PyArray_GETPTR2(matrix, 0, 0));
-        std::complex<double> out = opt<std::complex<double>>(m, n, ptr);
-        return PyComplex_FromDoubles(out.real(), out.imag());
-    } else {
-        PyErr_SetString(PyExc_TypeError,
-                        "Array must have dtype (double|complex)");
-        return nullptr;
-    }
-}
-
-static PyObject *py_combinatoric(PyObject *module, PyObject *object) {
-    (void)module;
-
-    PyArrayObject *matrix = reinterpret_cast<PyArrayObject *>(
-        PyArray_FromAny(object, nullptr, 2, 2, NPY_ARRAY_CARRAY_RO, nullptr));
-    if (!matrix) {
-        PyErr_SetString(PyExc_TypeError,
-                        "Argument must be a 2-dimensional array");
-        return nullptr;
-    }
-
-    std::size_t m = PyArray_DIMS(matrix)[0];
-    std::size_t n = PyArray_DIMS(matrix)[1];
-    if (m > n) {
-        return py_combinatoric(module, PyArray_Transpose(matrix, nullptr));
-    }
-
-    int type = PyArray_TYPE(matrix);
-    if (type == NPY_DOUBLE) {
-        double *ptr = reinterpret_cast<double *>(PyArray_GETPTR2(matrix, 0, 0));
-        return PyFloat_FromDouble(
-            (m == n) ? combinatoric<double>(m, n, ptr)
-                     : combinatoric_rectangular<double>(m, n, ptr));
-    } else if (type == NPY_COMPLEX128) {
-        std::complex<double> *ptr = reinterpret_cast<std::complex<double> *>(
-            PyArray_GETPTR2(matrix, 0, 0));
-        std::complex<double> out =
-            (m == n)
-                ? combinatoric<std::complex<double>>(m, n, ptr)
-                : combinatoric_rectangular<std::complex<double>>(m, n, ptr);
-        return PyComplex_FromDoubles(out.real(), out.imag());
-    } else {
-        PyErr_SetString(PyExc_TypeError,
-                        "Array must have dtype (double|complex)");
-        return nullptr;
-    }
-}
-
-static PyObject *py_glynn(PyObject *module, PyObject *object) {
-    (void)module;
-
-    PyArrayObject *matrix = reinterpret_cast<PyArrayObject *>(
-        PyArray_FromAny(object, nullptr, 2, 2, NPY_ARRAY_CARRAY_RO, nullptr));
-    if (!matrix) {
-        PyErr_SetString(PyExc_TypeError,
-                        "Argument must be a 2-dimensional array");
-        return nullptr;
-    }
-
-    std::size_t m = PyArray_DIMS(matrix)[0];
-    std::size_t n = PyArray_DIMS(matrix)[1];
-    if (m > n) {
-        return py_glynn(module, PyArray_Transpose(matrix, nullptr));
-    }
-
-    int type = PyArray_TYPE(matrix);
-    if (type == NPY_DOUBLE) {
-        double *ptr = reinterpret_cast<double *>(PyArray_GETPTR2(matrix, 0, 0));
-        return PyFloat_FromDouble((m == n)
-                                      ? glynn<double>(m, n, ptr)
-                                      : glynn_rectangular<double>(m, n, ptr));
-    } else if (type == NPY_COMPLEX128) {
-        std::complex<double> *ptr = reinterpret_cast<std::complex<double> *>(
-            PyArray_GETPTR2(matrix, 0, 0));
-        std::complex<double> out =
-            (m == n) ? glynn<std::complex<double>>(m, n, ptr)
-                     : glynn_rectangular<std::complex<double>>(m, n, ptr);
-        return PyComplex_FromDoubles(out.real(), out.imag());
-    } else {
-        PyErr_SetString(PyExc_TypeError,
-                        "Array must have dtype (double|complex)");
-        return nullptr;
-    }
-}
-
-static PyObject *py_ryser(PyObject *module, PyObject *object) {
-    (void)module;
-
-    PyArrayObject *matrix = reinterpret_cast<PyArrayObject *>(
-        PyArray_FromAny(object, nullptr, 2, 2, NPY_ARRAY_CARRAY_RO, nullptr));
-    if (!matrix) {
-        PyErr_SetString(PyExc_TypeError,
-                        "Argument must be a 2-dimensional array");
-        return nullptr;
-    }
-
-    std::size_t m = PyArray_DIMS(matrix)[0];
-    std::size_t n = PyArray_DIMS(matrix)[1];
-    if (m > n) {
-        return py_ryser(module, PyArray_Transpose(matrix, nullptr));
-    }
-
-    int type = PyArray_TYPE(matrix);
-    if (type == NPY_DOUBLE) {
-        double *ptr = reinterpret_cast<double *>(PyArray_GETPTR2(matrix, 0, 0));
-        return PyFloat_FromDouble((m == n)
-                                      ? ryser<double>(m, n, ptr)
-                                      : ryser_rectangular<double>(m, n, ptr));
-    } else if (type == NPY_COMPLEX128) {
-        std::complex<double> *ptr = reinterpret_cast<std::complex<double> *>(
-            PyArray_GETPTR2(matrix, 0, 0));
-        std::complex<double> out =
-            (m == n) ? ryser<std::complex<double>>(m, n, ptr)
-                     : ryser_rectangular<std::complex<double>>(m, n, ptr);
-        return PyComplex_FromDoubles(out.real(), out.imag());
-    } else {
-        PyErr_SetString(PyExc_TypeError,
-                        "Array must have dtype (double|complex)");
-        return nullptr;
-    }
-}
-
-/* Define the Python methods that will go into the C extension module.       *
- *                                                                           *
- * Note:  METH_O indicates that the Python function takes a single argument. *
- *        On the C side, the function takes two PyObject* arguments;         *
- *        the first one is the C extension module itself,                    *
- *        and the second one is the argument to the Python function.         */
-
-static PyMethodDef methods[] = {
-    /* Python function name     C function          Args flag   Docstring */
-    {"opt", py_opt, METH_O, DOCSTRING_PERMANENT},
-    {"combinatoric", py_combinatoric, METH_O, DOCSTRING_COMBINATORIC},
-    {"glynn", py_glynn, METH_O, DOCSTRING_GLYNN},
-    {"ryser", py_ryser, METH_O, DOCSTRING_RYSER},
-    {nullptr, nullptr, 0, nullptr} /* sentinel value */
-};
-
-/* Define the C extension module. */
-
-static struct PyModuleDef definition = {
-    PyModuleDef_HEAD_INIT,
-    "permanent",
-    DOCSTRING_MODULE,
-    -1,
-    methods,
-    nullptr,
-    nullptr,
-    nullptr,
-    nullptr,
-};
-
-/* Initialize the C extension module. */
-
-// cppcheck-suppress unusedFunction
-PyMODINIT_FUNC PyInit_permanent(void) {
-    Py_Initialize();                     /* Initialize Python API */
-    import_array();                      /* Initialize NumPy NDArray API */
-    return PyModule_Create(&definition); /* Create module. */
-}
diff --git a/src/tuning.cc b/src/tuning.cc
index 6c934d1..b7d6f64 100644
--- a/src/tuning.cc
+++ b/src/tuning.cc
@@ -1,4 +1,17 @@
-/* Copyright 2034 QC-Devs (GPLv3) */
+/* Copyright 2024 QC-Devs (GPLv3) */
+
+#if defined(__GNUC__) || defined(__clang__)
+#define _PERMANENT_ALWAYS_INLINE __attribute__((always_inline))
+#elif defined(_MSC_VER) && !defined(__clang__)
+#define _PERMANENT_ALWAYS_INLINE __forceinline
+#define __func__ __FUNCTION__
+#else
+#define _PERMANENT_ALWAYS_INLINE
+#endif
+
+#define _PERMANENT_DEFAULT_TUNING
+
+#include <permanent.h>
 
 #include <cmath>
 #include <cstdlib>
@@ -6,282 +19,215 @@
 #include <fstream>
 #include <iomanip>
 #include <iostream>
-#include <string>
-
-#include "permanent.h"
-
-#ifdef WITH_TUNING_FILE
-
-#include "tuning.h"
 
-#endif  // WITH_TUNING_FILE
-
-#ifdef RUN_TUNING
+namespace permanent {
 
 constexpr char CSV_FILE[] = "src/tuning.csv";
 
 constexpr char CSV_HEADER[] = "M/N,N,Combn,Glynn,Ryser,Fastest";
 
-constexpr std::size_t NUM_TRIALS = 5;
+constexpr size_t NUM_TRIALS = 5;
 
-constexpr std::size_t MAX_ROWS = 16;
+constexpr size_t MAX_ROWS = 16;
 
-constexpr std::size_t MAX_COLS = 16;
+constexpr size_t MAX_COLS = 16;
 
-constexpr std::size_t DATA_POINTS = MAX_ROWS * MAX_COLS;
+constexpr size_t DATA_POINTS = MAX_ROWS * MAX_COLS;
 
 constexpr double TOLERANCE = 0.0001;
 
-#else
-
-constexpr char HEADER_FILE[] = "src/tuning.h";
-
-constexpr double DEFAULT_PARAM_1 = -0.572098;
-
-constexpr double DEFAULT_PARAM_2 = -22.014212;
-
-constexpr double DEFAULT_PARAM_3 = 15.297940;
-
-constexpr double DEFAULT_PARAM_4 = 3.0;
-
-#endif  // RUN_TUNING
-
-#ifdef RUN_TUNING
-
-int main(int argc, char *argv[]) {
-    (void)argc;
-    (void)argv;
-
-    /* Open CSV file */
-
-    std::ofstream csv_file;
-
-    csv_file.open(CSV_FILE);
-
-    if (csv_file.fail()) {
-        std::cerr << "Cannot open CSV file" << std::endl;
-        return 2;
-    }
-
-    /* Set format options for printing */
-
-    std::cout.precision(9);
-    std::cerr.precision(9);
-    csv_file.precision(9);
-
-    std::cout.fill(' ');
-    std::cerr.fill(' ');
-    csv_file.fill(' ');
-
-    /* Print CSV headers */
-
-    std::cout << CSV_HEADER << std::endl;
+namespace {
 
-    csv_file << CSV_HEADER << '\n';
+template <typename T>
+inline _PERMANENT_ALWAYS_INLINE void ensure(T &value)
+{
+  asm volatile("" : "+r,m"(value) : : "memory");
+}
 
-    if (csv_file.fail()) {
-        std::cerr << "Error writing to CSV file" << std::endl;
+template <typename T, typename I = void>
+int generate_tuning_data(std::ofstream &csv_file, const size_t m, const size_t n, const T *array,
+                         double *time_combn, double *time_glynn, double *time_ryser)
+{
+  // Solve the permanent using each algorithm NUM_TRIALS number of times
+  result_t<T, I> soln_combn, soln_glynn, soln_ryser;
+  std::clock_t begin, end;
+  if (m == n) {
+    for (size_t i = 0; i != NUM_TRIALS; ++i) {
+      begin = std::clock();
+      soln_combn = permanent::combinatoric_square<T, I>(m, n, array);
+      ensure(soln_combn);
+      end = std::clock();
+      time_combn[i] = static_cast<double>(end - begin);
+
+      begin = std::clock();
+      soln_glynn = permanent::glynn_square<T, I>(m, n, array);
+      ensure(soln_glynn);
+      end = std::clock();
+      time_glynn[i] = static_cast<double>(end - begin);
+
+      begin = std::clock();
+      soln_ryser = permanent::ryser_square<T, I>(m, n, array);
+      ensure(soln_ryser);
+      end = std::clock();
+      time_ryser[i] = static_cast<double>(end - begin);
+
+      if ((std::fabs(soln_combn - soln_glynn) /
+               std::min(std::fabs(soln_combn), std::fabs(soln_glynn)) >
+           TOLERANCE) ||
+          (std::fabs(soln_ryser - soln_combn) /
+               std::min(std::fabs(soln_ryser), std::fabs(soln_combn)) >
+           TOLERANCE) ||
+          (std::fabs(soln_glynn - soln_ryser) /
+               std::min(std::fabs(soln_glynn), std::fabs(soln_ryser)) >
+           TOLERANCE)) {
+        std::cerr << "Bad permanent values:"
+                  << "\nCombn: " << soln_combn << "\nGlynn: " << soln_glynn
+                  << "\nRyser: " << soln_ryser << std::endl;
         csv_file.close();
-        return 2;
+        return 3;
+      }
     }
-
-    /* Time efficiency of algorithms for different size matrices */
-
-    std::size_t i, m, n;
-
-    std::clock_t begin, end;
-
-    double soln_combn, soln_glynn, soln_ryser;
-    double mean_combn, mean_glynn, mean_ryser;
-
-    double time_combn[128];
-    double time_glynn[128];
-    double time_ryser[128];
-
-    double array[MAX_ROWS * MAX_COLS];
-
-    int fastest;
-
-    /* Random binary matrix for testing. */
-
-    std::srand(1234789789U);
-    for (i = 0; i < MAX_ROWS * MAX_COLS; ++i) {
-        array[i] = (std::rand() / static_cast<double>(RAND_MAX) - 0.5) * 2;
-    }
-
-    /* Iterate over number of rows and number of columns. */
-
-    for (m = 2; m <= MAX_ROWS; ++m) {
-        for (n = m; n <= MAX_COLS; ++n) {
-            /* Solve the permanent using each algorithm NUM_TRIALS number of
-             * times. */
-
-            if (m == n) {
-                for (i = 0; i != NUM_TRIALS; ++i) {
-                    begin = std::clock();
-                    soln_combn = combinatoric<double>(m, n, array);
-                    end = std::clock();
-                    time_combn[i] = static_cast<double>(end - begin);
-
-                    begin = std::clock();
-                    soln_glynn = glynn<double>(m, n, array);
-                    end = std::clock();
-                    time_glynn[i] = static_cast<double>(end - begin);
-
-                    begin = std::clock();
-                    soln_ryser = ryser<double>(m, n, array);
-                    end = std::clock();
-                    time_ryser[i] = static_cast<double>(end - begin);
-
-                    if ((std::fabs(soln_combn - soln_glynn) / soln_combn >
-                         TOLERANCE) ||
-                        (std::fabs(soln_combn - soln_ryser) / soln_ryser >
-                         TOLERANCE)) {
-                        std::cerr << "Bad permanent values:"
-                                  << "\nCombn: " << soln_combn
-                                  << "\nGlynn: " << soln_glynn
-                                  << "\nRyser: " << soln_ryser << std::endl;
-                        csv_file.close();
-                        return 1;
-                    }
-                }
-            } else {
-                for (i = 0; i != NUM_TRIALS; ++i) {
-                    begin = std::clock();
-                    soln_combn = combinatoric_rectangular<double>(m, n, array);
-                    end = std::clock();
-                    time_combn[i] = static_cast<double>(end - begin);
-
-                    begin = std::clock();
-                    soln_glynn = glynn_rectangular<double>(m, n, array);
-                    end = std::clock();
-                    time_glynn[i] = static_cast<double>(end - begin);
-
-                    begin = std::clock();
-                    soln_ryser = ryser_rectangular<double>(m, n, array);
-                    end = std::clock();
-                    time_ryser[i] = static_cast<double>(end - begin);
-
-                    if ((std::fabs(soln_combn - soln_glynn) / soln_combn >
-                         TOLERANCE) ||
-                        (std::fabs(soln_combn - soln_ryser) / soln_ryser >
-                         TOLERANCE)) {
-                        std::cerr << std::scientific << "Bad permanent values:"
-                                  << "\nCombn: " << soln_combn
-                                  << "\nGlynn: " << soln_glynn
-                                  << "\nRyser: " << soln_ryser << std::endl;
-                        csv_file.close();
-                        return 1;
-                    }
-                }
-            }
-
-            /* Calculate the mean for the runtime of each algorithm. */
-
-            mean_combn = 0.0;
-            mean_glynn = 0.0;
-            mean_ryser = 0.0;
-
-            for (i = 0; i != NUM_TRIALS; ++i) {
-                mean_combn += time_combn[i];
-                mean_glynn += time_glynn[i];
-                mean_ryser += time_ryser[i];
-            }
-
-            mean_combn = mean_combn / NUM_TRIALS;
-            mean_glynn = mean_glynn / NUM_TRIALS;
-            mean_ryser = mean_ryser / NUM_TRIALS;
-
-            /* Find the fastest algorithm */
-
-            if (mean_ryser <= mean_glynn) {
-                fastest = 0;
-            } else if (mean_combn <= mean_glynn) {
-                fastest = 1;
-            } else {
-                fastest = 2;
-            }
-
-            /* Write line */
-
-            std::cout << std::scientific << static_cast<double>(m) / n << ','
-                      << std::setw(2) << n << ',' << std::scientific
-                      << mean_combn << ',' << mean_glynn << ','
-                      << std::scientific << mean_ryser << ',' << fastest
-                      << std::endl;
-
-            csv_file << std::scientific << static_cast<double>(m) / n << ','
-                     << std::setw(2) << n << ',' << std::scientific
-                     << mean_combn << ',' << mean_glynn << ','
-                     << std::scientific << mean_ryser << ',' << fastest << '\n';
-
-            if (csv_file.fail()) {
-                std::cerr << "Error writing to CSV file" << std::endl;
-                csv_file.close();
-                return 2;
-            }
-        }
+  } else {
+    for (size_t i = 0; i != NUM_TRIALS; ++i) {
+      begin = std::clock();
+      soln_combn = permanent::combinatoric_rectangular<double>(m, n, array);
+      ensure(soln_combn);
+      end = std::clock();
+      time_combn[i] = static_cast<double>(end - begin);
+
+      begin = std::clock();
+      soln_glynn = permanent::glynn_rectangular<double>(m, n, array);
+      ensure(soln_glynn);
+      end = std::clock();
+      time_glynn[i] = static_cast<double>(end - begin);
+
+      begin = std::clock();
+      soln_ryser = permanent::ryser_rectangular<double>(m, n, array);
+      ensure(soln_ryser);
+      end = std::clock();
+      time_ryser[i] = static_cast<double>(end - begin);
+
+      if ((std::fabs(soln_combn - soln_glynn) /
+               std::min(std::fabs(soln_combn), std::fabs(soln_glynn)) >
+           TOLERANCE) ||
+          (std::fabs(soln_ryser - soln_combn) /
+               std::min(std::fabs(soln_ryser), std::fabs(soln_combn)) >
+           TOLERANCE) ||
+          (std::fabs(soln_glynn - soln_ryser) /
+               std::min(std::fabs(soln_glynn), std::fabs(soln_ryser)) >
+           TOLERANCE)) {
+        std::cerr << std::scientific << "Bad permanent values:"
+                  << "\nCombn: " << soln_combn << "\nGlynn: " << soln_glynn
+                  << "\nRyser: " << soln_ryser << std::endl;
+        csv_file.close();
+        return 3;
+      }
     }
-
-    /* Close CSV file */
-
+  }
+
+  // Calculate the mean for the runtime of each algorithm
+  double mean_combn = 0;
+  double mean_glynn = 0;
+  double mean_ryser = 0;
+  for (size_t i = 0; i != NUM_TRIALS; ++i) {
+    mean_combn += time_combn[i];
+    mean_glynn += time_glynn[i];
+    mean_ryser += time_ryser[i];
+  }
+  mean_combn /= NUM_TRIALS;
+  mean_glynn /= NUM_TRIALS;
+  mean_ryser /= NUM_TRIALS;
+
+  int fastest;
+  if (mean_ryser <= mean_glynn) {
+    fastest = 0;
+  } else if (mean_combn <= mean_glynn) {
+    fastest = 1;
+  } else {
+    fastest = 2;
+  }
+
+  // Write line
+  std::cout << std::scientific << static_cast<double>(m) / n << ',' << std::setw(2) << n << ','
+            << std::scientific << mean_combn << ',' << mean_glynn << ',' << std::scientific
+            << mean_ryser << ',' << fastest << std::endl;
+
+  csv_file << std::scientific << static_cast<double>(m) / n << ',' << std::setw(2) << n << ','
+           << std::scientific << mean_combn << ',' << mean_glynn << ',' << std::scientific
+           << mean_ryser << ',' << fastest << '\n';
+
+  if (csv_file.fail()) {
+    std::cerr << "Error writing to CSV file" << std::endl;
     csv_file.close();
-
-    /* Exit successfully */
-
-    return 0;
+    return 2;
+  }
+  return 0;
 }
 
-#else
-
-int main(int argc, char *argv[]) {
-    (void)argc;
-    (void)argv;
-
-    /* Open header file */
-
-    std::ofstream header_file;
-
-    header_file.open(HEADER_FILE);
-
-    if (header_file.fail()) {
-        std::cerr << "Cannot open header file" << std::endl;
-        return 2;
+template <typename T, typename I = void>
+int run_tuning(const char *filename)
+{
+  // Open CSV file
+  std::ofstream csv_file = std::ofstream(filename);
+  // csv_file.open(filename);
+  if (csv_file.fail()) {
+    std::cerr << "Cannot open CSV file `" << filename << '`' << std::endl;
+    return 2;
+  }
+
+  // Print CSV headers
+  csv_file << CSV_HEADER << '\n';
+  if (csv_file.fail()) {
+    std::cerr << "Error writing to CSV file `" << filename << '`' << std::endl;
+    csv_file.close();
+    return 2;
+  }
+
+  // Set format options for printing
+  csv_file.precision(9);
+  csv_file.fill(' ');
+
+  // Random binary matrix for testing
+  double array[MAX_ROWS * MAX_COLS];
+  std::srand(static_cast<unsigned int>(0x23a23cf5033c3c81UL));
+  for (size_t i = 0; i < MAX_ROWS * MAX_COLS; ++i) {
+    array[i] = (std::rand() / static_cast<double>(RAND_MAX) - 0.5) * 2;
+  }
+
+  double time_combn[128];
+  double time_glynn[128];
+  double time_ryser[128];
+
+  // Iterate over number of rows and number of columns
+  for (size_t m = 2; m <= MAX_ROWS; ++m) {
+    for (size_t n = m; n <= MAX_COLS; ++n) {
+      int result =
+          generate_tuning_data<T, I>(csv_file, m, n, array, time_combn, time_glynn, time_ryser);
+      if (result != 0) {
+        return result;
+      }
     }
+  }
 
-    /* Set format options for printing */
-
-    header_file.precision(9);
-
-    /* Write default header file */
-
-    header_file << "#ifndef TUNING_H_\n";
-    header_file << "#define TUNING_H_\n";
-    header_file << "\n\n";
-    header_file << "constexpr double PARAM_1 = " << std::scientific
-                << DEFAULT_PARAM_1 << ";\n";
-    header_file << "constexpr double PARAM_2 = " << std::scientific
-                << DEFAULT_PARAM_2 << ";\n";
-    header_file << "constexpr double PARAM_3 = " << std::scientific
-                << DEFAULT_PARAM_3 << ";\n";
-    header_file << "constexpr double PARAM_4 = " << std::scientific
-                << DEFAULT_PARAM_4 << ";\n";
-    header_file << "\n\n";
-    header_file << "#endif  // TUNING_H_\n";
-
-    if (header_file.fail()) {
-        std::cerr << "Error writing to header file" << std::endl;
-        header_file.close();
-        return 2;
-    }
+  // Close CSV file
+  csv_file.close();
 
-    /* Close header file */
+  // Exit successfully
+  return 0;
+}
 
-    header_file.close();
+}  // namespace
 
-    /* Exit successfully */
+}  // namespace permanent
 
-    return 0;
+int main(int argc, const char **argv)
+{
+  if (argc != 2) {
+    std::cerr << "Usage: " << argv[0] << " CSV_FILE" << std::endl;
+    return 1;
+  }
+  return permanent::run_tuning<double>(argv[1]);
 }
 
-#endif  // RUN_TUNING
+#undef _PERMANENT_ALWAYS_INLINE
+#undef _PERMANENT_DEFAULT_TUNING
diff --git a/tests/test_permanent.py b/tests/test_permanent.py
new file mode 100644
index 0000000..ee53532
--- /dev/null
+++ b/tests/test_permanent.py
@@ -0,0 +1,317 @@
+from math import factorial
+
+import numpy as np
+import numpy.testing as npt
+
+import pytest
+
+import permanent
+
+
+ATOL = 0e0
+
+
+RTOL = 1e-4
+
+
+FNS = [
+    permanent.opt,
+    permanent.combinatoric,
+    permanent.glynn,
+    permanent.ryser,
+]
+
+
+@pytest.mark.parametrize(
+    "args",
+    [
+        (np.arange(1, 5, dtype=float).reshape(2, 2), 10),
+        (np.arange(1, 5, dtype=int).reshape(2, 2), 10),
+        (np.arange(1, 5, dtype=complex).reshape(2, 2), 10),
+    ],
+)
+@pytest.mark.parametrize("fn", FNS)
+def test_compute_permanent(fn, args):
+    r"""Ensure that the correct permanent is computed."""
+    npt.assert_allclose(fn(args[0]), args[1], atol=ATOL, rtol=RTOL)
+
+
+@pytest.mark.parametrize(
+    "arg",
+    [
+        np.ones((2, 65)),
+        np.ones((65, 2)),
+        np.ones((65, 65)),
+    ],
+)
+@pytest.mark.parametrize("fn", FNS)
+def test_dim_gt_64_raises(fn, arg):
+    r"""Ensure that matrices with any dimension > 64 raise a ValueError."""
+    with npt.assert_raises(ValueError):
+        fn(arg)
+
+
+@pytest.mark.parametrize(
+    "arg",
+    [
+        np.ones((2, 23), dtype=int),
+        np.ones((23, 2), dtype=int),
+        np.ones((43, 64), dtype=int),
+        np.ones((64, 43), dtype=int),
+    ],
+)
+@pytest.mark.parametrize("fn", FNS)
+def test_int_dim_diff_gt_20_raises(fn, arg):
+    r"""Ensure that integer matrices with difference in dimensions > 20 raise a ValueError."""
+    with npt.assert_raises(ValueError):
+        fn(arg)
+
+
+@pytest.mark.parametrize(
+    "arg",
+    [
+        np.ones((4, 4), dtype=bool),
+        np.array([[object() for _ in range(4)] for _ in range(4)]),
+        np.array([[{"hi": 1} for _ in range(4)] for _ in range(4)]),
+        np.array([[chr(x) for x in range(65, 69)] for _ in range(4)]),
+    ],
+)
+@pytest.mark.parametrize("fn", FNS)
+def test_invalid_type_raises(fn, arg):
+    r"""Ensure that matrices with an invalid dtype raise a ValueError."""
+    with npt.assert_raises(TypeError):
+        fn(arg)
+
+
+# # Test square matrices
+# def test_2by2_comb():
+#     matrix = np.arange(1, 5, dtype=np.double).reshape(2, 2)
+#     assert abs((permanent.combinatoric(matrix) - 10) / 10) <= REL_ERROR
+#     npt.assert_allclose(
+#
+#
+# def test_2by2_glynn():
+#     matrix = np.arange(1, 5, dtype=np.double).reshape(2, 2)
+#     assert abs((permanent.glynn(matrix) - 10) / 10) <= REL_ERROR
+#
+#
+# def test_2by2_ryser():
+#     matrix = np.arange(1, 5, dtype=np.double).reshape(2, 2)
+#     assert abs((permanent.ryser(matrix) - 10) / 10) <= REL_ERROR
+#
+#
+# def test_3by3_comb():
+#     matrix = np.arange(1, 10, dtype=np.double).reshape(3, 3)
+#     assert abs((permanent.combinatoric(matrix) - 450) / 450) <= REL_ERROR
+#
+#
+# def test_3by3_glynn():
+#     matrix = np.arange(1, 10, dtype=np.double).reshape(3, 3)
+#     assert abs((permanent.glynn(matrix) - 450) / 450) <= REL_ERROR
+#
+#
+# def test_3by3_ryser():
+#     matrix = np.arange(1, 10, dtype=np.double).reshape(3, 3)
+#     assert abs((permanent.ryser(matrix) - 450) / 450) <= REL_ERROR
+#
+#
+# def test_4by4_comb():
+#     matrix = np.arange(1, 17, dtype=np.double).reshape(4, 4)
+#     assert abs((permanent.combinatoric(matrix) - 55456) / 55456) <= REL_ERROR
+#
+#
+# def test_4by4_glynn():
+#     matrix = np.arange(1, 17, dtype=np.double).reshape(4, 4)
+#     assert abs((permanent.glynn(matrix) - 55456) / 55456) <= REL_ERROR
+#
+#
+# def test_4by4_ryser():
+#     matrix = np.arange(1, 17, dtype=np.double).reshape(4, 4)
+#     assert abs((permanent.ryser(matrix) - 55456) / 55456) <= REL_ERROR
+#
+#
+# def test_7by7_comb():
+#     matrix = np.arange(1, 50, dtype=np.double).reshape(7, 7)
+#     assert (
+#         abs((permanent.combinatoric(matrix) - 5373548250000) / 5373548250000)
+#         <= REL_ERROR
+#     )
+#
+#
+# def test_7by7_glynn():
+#     matrix = np.arange(1, 50, dtype=np.double).reshape(7, 7)
+#     assert abs((permanent.glynn(matrix) - 5373548250000) / 5373548250000) <= REL_ERROR
+#
+#
+# def test_7by7_ryser():
+#     matrix = np.arange(1, 50, dtype=np.double).reshape(7, 7)
+#     assert abs((permanent.ryser(matrix) - 5373548250000) / 5373548250000) <= REL_ERROR
+#
+#
+# ## Test rectangular matrices
+# def test_2by3_comb():
+#     matrix = np.arange(1, 7, dtype=np.double).reshape(2, 3)
+#     assert abs((permanent.combinatoric(matrix) - 58) / 58) <= REL_ERROR
+#
+#
+# def test_2by3_glynn():
+#     matrix = np.arange(1, 7, dtype=np.double).reshape(2, 3)
+#     assert abs((permanent.glynn(matrix) - 58) / 58) <= REL_ERROR
+#
+#
+# def test_2by3_ryser():
+#     matrix = np.arange(1, 7, dtype=np.double).reshape(2, 3)
+#     assert abs((permanent.ryser(matrix) - 58) / 58) <= REL_ERROR
+#
+#
+# def test_2by4_comb():
+#     matrix = np.arange(1, 9, dtype=np.double).reshape(2, 4)
+#     assert abs((permanent.combinatoric(matrix) - 190) / 190) <= REL_ERROR
+#
+#
+# def test_2by4_glynn():
+#     matrix = np.arange(1, 9, dtype=np.double).reshape(2, 4)
+#     assert abs((permanent.glynn(matrix) - 190) / 190) <= REL_ERROR
+#
+#
+# def test_2by4_ryser():
+#     matrix = np.arange(1, 9, dtype=np.double).reshape(2, 4)
+#     assert abs((permanent.ryser(matrix) - 190) / 190) <= REL_ERROR
+#
+#
+# def test_2by7_comb():
+#     matrix = np.arange(1, 15, dtype=np.double).reshape(2, 7)
+#     assert abs((permanent.combinatoric(matrix) - 1820) / 1820) <= REL_ERROR
+#
+#
+# def test_2by7_glynn():
+#     matrix = np.arange(1, 15, dtype=np.double).reshape(2, 7)
+#     assert abs((permanent.glynn(matrix) - 1820) / 1820) <= REL_ERROR
+#
+#
+# def test_2by7_ryser():
+#     matrix = np.arange(1, 15, dtype=np.double).reshape(2, 7)
+#     assert abs((permanent.ryser(matrix) - 1820) / 1820) <= REL_ERROR
+#
+#
+# def test_5by7_comb():
+#     matrix = np.arange(1, 36, dtype=np.double).reshape(5, 7)
+#     assert abs((permanent.combinatoric(matrix) - 1521238320) / 1521238320) <= REL_ERROR
+#
+#
+# def test_5by7_glynn():
+#     matrix = np.arange(1, 36, dtype=np.double).reshape(5, 7)
+#     assert abs((permanent.glynn(matrix) - 1521238320) / 1521238320) <= REL_ERROR
+#
+#
+# def test_5by7_ryser():
+#     matrix = np.arange(1, 36, dtype=np.double).reshape(5, 7)
+#     assert abs((permanent.ryser(matrix) - 1521238320) / 1521238320) <= REL_ERROR
+#
+#
+# def test_6by7_comb():
+#     matrix = np.arange(1, 43, dtype=np.double).reshape(6, 7)
+#     assert (
+#         abs((permanent.combinatoric(matrix) - 117681979920) / 117681979920) <= REL_ERROR
+#     )
+#
+#
+# def test_6by7_glynn():
+#     matrix = np.arange(1, 43, dtype=np.double).reshape(6, 7)
+#     assert abs((permanent.glynn(matrix) - 117681979920) / 117681979920) <= REL_ERROR
+#
+#
+# def test_6by7_ryser():
+#     matrix = np.arange(1, 43, dtype=np.double).reshape(6, 7)
+#     assert abs((permanent.ryser(matrix) - 117681979920) / 117681979920) <= REL_ERROR
+#
+#
+# def test_ones_comb():
+#     matrix = np.ones((10, 10), dtype=np.double)
+#     assert (
+#         abs((permanent.combinatoric(matrix) - factorial(10)) / factorial(10))
+#         <= REL_ERROR
+#     )
+#
+#
+# def test_ones_ryser():
+#     matrix = np.ones((10, 10), dtype=np.double)
+#     assert (
+#         abs((permanent.ryser(matrix) - factorial(10)) / factorial(10))
+#         <= REL_ERROR
+#     )
+#
+#
+# def test_ones_glynn():
+#     matrix = np.ones((10, 10), dtype=np.double)
+#     assert (
+#         abs((permanent.glynn(matrix) - factorial(10)) / factorial(10))
+#         <= REL_ERROR
+#     )
+#
+#
+# def test_ones_comb_big():
+#     matrix = np.ones((12, 12), dtype=np.double)
+#     assert (
+#         abs((permanent.combinatoric(matrix) - factorial(12)) / factorial(12))
+#         <= REL_ERROR
+#     )
+#
+#
+# def test_ones_ryser_big():
+#     matrix = np.ones((12, 12), dtype=np.double)
+#     assert (
+#         abs((permanent.ryser(matrix) - factorial(12)) / factorial(12))
+#         <= REL_ERROR
+#     )
+#
+#
+# def test_ones_glynn_big():
+#     matrix = np.ones((12, 12), dtype=np.double)
+#     assert (
+#         abs((permanent.glynn(matrix) - factorial(12)) / factorial(12))
+#         <= REL_ERROR
+#     )
+#
+#
+# def test_identity_comb():
+#     matrix = np.identity(10, dtype=np.double)
+#     assert abs((permanent.combinatoric(matrix) - 1) / 1) <= REL_ERROR
+#
+#
+# def test_identity_ryser():
+#     matrix = np.identity(10, dtype=np.double)
+#     assert abs((permanent.ryser(matrix) - 1) / 1) <= REL_ERROR
+#
+#
+# def test_identity_glynn():
+#     matrix = np.identity(10, dtype=np.double)
+#     assert abs((permanent.glynn(matrix) - 1) / 1) <= REL_ERROR
+#
+#
+# def test_identity_ryser_odd():
+#     matrix = np.identity(5, dtype=np.double)
+#     assert abs((permanent.ryser(matrix) - 1) / 1) <= REL_ERROR
+#
+#
+# def test_identity_glynn_odd():
+#     matrix = np.identity(5, dtype=np.double)
+#     assert abs((permanent.glynn(matrix) - 1) / 1) <= REL_ERROR
+#
+#
+# def test_identity_comb_diag():
+#     matrix = np.ones((3, 7), dtype=np.double)
+#     diag_matrix = np.diag(np.diag(matrix))
+#     assert abs((permanent.combinatoric(diag_matrix) - 1) / 1) <= REL_ERROR
+#
+#
+# def test_identity_ryser_diag():
+#     matrix = np.ones((3, 7), dtype=np.double)
+#     diag_matrix = np.diag(np.diag(matrix))
+#     assert abs((permanent.ryser(diag_matrix) - 1) / 1) <= REL_ERROR
+#
+#
+# def test_identity_glynn_diag():
+#     matrix = np.ones((3, 7), dtype=np.double)
+#     diag_matrix = np.diag(np.diag(matrix))
+#     assert abs((permanent.glynn(diag_matrix) - 1) / 1) <= REL_ERROR
diff --git a/tools/tuning.py b/tools/generate_tuning_header.py
similarity index 68%
rename from tools/tuning.py
rename to tools/generate_tuning_header.py
index 6b3d780..aa82556 100644
--- a/tools/tuning.py
+++ b/tools/generate_tuning_header.py
@@ -1,3 +1,5 @@
+import sys
+
 import numpy as np
 
 import pandas as pd
@@ -6,9 +8,9 @@
 from sklearn.metrics import accuracy_score
 
 
-CSV_FILE = "src/tuning.csv"
+CSV_FILE = sys.argv[1]
 
-HEADER_FILE = "src/tuning.h"
+HEADER_FILE = sys.argv[2]
 
 
 # Read output from tuning program
@@ -79,17 +81,37 @@
 try:
     with open(HEADER_FILE, "w") as file_ptr:
         file_ptr.write(
-            f"""#ifndef TUNING_H_
-#define TUNING_H_
+            f"""/* Copyright 2024 QC-Devs (GPLv3) */
+
+#if !defined(permanent_tuning_h_)
+#define permanent_tuning_h_
+
+namespace permanent {{
+
+template<typename Type, typename IntType = void>
+struct _tuning_params_t
+{{
+  static constexpr double PARAM_1 = {param_1:+16.9e};
+  static constexpr double PARAM_2 = {param_2:+16.9e};
+  static constexpr double PARAM_3 = {param_3:+16.9e};
+  static constexpr double PARAM_4 = {param_4:+16.9e};
+}};
+
+template<typename Type, typename IntType = void>
+static constexpr double PARAM_1 = _tuning_params_t<Type, IntType>::PARAM_1;
+
+template<typename Type, typename IntType = void>
+static constexpr double PARAM_2 = _tuning_params_t<Type, IntType>::PARAM_2;
 
+template<typename Type, typename IntType = void>
+static constexpr double PARAM_3 = _tuning_params_t<Type, IntType>::PARAM_3;
 
- constexpr double PARAM_1 = {param_1:.9f};
- constexpr double PARAM_2 = {param_2:.9f};
- constexpr double PARAM_3 = {param_3:.9f};
- constexpr double PARAM_4 = {param_4:.9f};
+template<typename Type, typename IntType = void>
+static constexpr double PARAM_4 = _tuning_params_t<Type, IntType>::PARAM_4;
 
+}}  // namespace permanent
 
-#endif  // TUNING_H_
+#endif  // permanent_tuning_h_
 """
         )
 
diff --git a/tools/include_dirs.py b/tools/include_dirs.py
deleted file mode 100644
index ec9c720..0000000
--- a/tools/include_dirs.py
+++ /dev/null
@@ -1,6 +0,0 @@
-import sysconfig
-
-import numpy
-
-
-print(f"-I{sysconfig.get_paths()['include']} -I{numpy.get_include()}")