LinkedEarth · CommonClimate · Jan 28, 2025 · Jan 9, 2025 · Jan 13, 2025 · Jan 13, 2025
diff --git a/environment.yml b/environment.yml
@@ -20,4 +20,5 @@ dependencies:
   - pytest
   - pip:
     - pyhht
+    - dill
     - '-e .'
diff --git a/pyleoclim/core/coherences.py b/pyleoclim/core/coherences.py
@@ -7,6 +7,13 @@
 from ..utils import plotting
 from ..utils import wavelet as waveutils
 from ..core.scalograms import Scalogram, MultipleScalogram
+import dill
+import multiprocessing
+
+# Set `dill` as the pickler for multiprocessing
+multiprocessing.set_start_method("spawn", force=True)  # Use "fork" (most compatible with dill)
+multiprocessing.get_context("spawn").reduce = dill.dumps
+multiprocessing.get_context("spawn").rebuild = dill.loads
 
 import matplotlib.pyplot as plt
 import numpy as np
@@ -20,6 +27,27 @@
 from scipy.stats.mstats import mquantiles
 import warnings
 
+from concurrent.futures import ProcessPoolExecutor #parallel processing library
+from contextlib import contextmanager
+
+def _run_wavelet_coherence(args):
+    """Helper function for parallel wavelet coherence computation."""
+    surr1_series, surr2_series, wave_method, wave_args = args
+    return surr1_series.wavelet_coherence(
+        surr2_series, method=wave_method, settings=wave_args
+    )
+
+def _run_global_coherence(args):
+    """Helper function for computing global coherence between surrogate series."""
+    surr_series1, surr_series2, wavelet_kwargs = args
+    return surr_series1.global_coherence(surr_series2, wavelet_kwargs=wavelet_kwargs).global_coh
+
+@contextmanager
+def _get_process_pool():
+    ctx = multiprocessing.get_context("spawn")
+    with ProcessPoolExecutor(mp_context=ctx) as executor:
+        yield executor
+
 class Coherence:
     '''Coherence object, meant to receive the WTC and XWT part of Series.wavelet_coherence()
 
@@ -689,6 +717,7 @@ def signif_test(self, number=200, method='ar1sim', seed=None, qs=[0.95], setting
             coh.signif_test(method='phaseran').plot() 
         '''
         from ..core.surrogateseries import SurrogateSeries
+        from ..core.series import Series #This is necessary for the multiprocessing pickling process!!! DO NOT REMOVE!!!!!
 
         if number == 0:
             return self
@@ -700,16 +729,32 @@ def signif_test(self, number=200, method='ar1sim', seed=None, qs=[0.95], setting
         surr2 = SurrogateSeries(method=method,number=number, seed=seed)
         surr2.from_series(self.timeseries2)
 
-        # adjust time axis
-
-        wtcs, xwts = [], []
+        # Prepare arguments for parallel processing
+        args = [
+            (
+                surr1.series_list[i],
+                surr2.series_list[i],
+                self.wave_method,
+                self.wave_args,
+            )
+            for i in range(number)
+        ]
+
+
+        # Perform wavelet coherence calculations in parallel
+        with _get_process_pool() as executor:
+            results = list(
+                tqdm(
+                    executor.map(_run_wavelet_coherence, args),
+                    total=number,
+                    desc="Performing wavelet coherence on surrogate pairs",
+                    disable=mute_pbar,
+                )
+            )
 
-        for i in tqdm(range(number), desc='Performing wavelet coherence on surrogate pairs', total=number, disable=mute_pbar):
-            coh_tmp = surr1.series_list[i].wavelet_coherence(surr2.series_list[i],
-                                                             method  = self.wave_method,
-                                                             settings = self.wave_args)
-            wtcs.append(coh_tmp.wtc)
-            xwts.append(coh_tmp.xwt)
+        # Split results into wtcs and xwts
+        wtcs = [result.wtc for result in results]
+        xwts = [result.xwt for result in results]
 
         wtcs = np.array(wtcs)
         xwts = np.array(xwts)
@@ -976,13 +1021,29 @@ def signif_test(self,method='ar1sim',number=200,qs=[.95]):
             'method':self.coh.wave_method,
         }
 
-        for i in range(number):
-            surr_series1 = surr1.series_list[i]
-            surr_series2 = surr2.series_list[i]
-            surr_coh = surr_series1.global_coherence(surr_series2,wavelet_kwargs=wavelet_kwargs)
-            coh_array[i,:] = surr_coh.global_coh
-
-        quantiles = mquantiles(coh_array,qs,axis=0)
+        # Prepare arguments for parallel processing
+        args = [
+            (surr1.series_list[i], surr2.series_list[i], wavelet_kwargs)
+            for i in range(number)
+        ]
+
+        # Use DillProcessPoolExecutor for parallel execution
+        with _get_process_pool() as executor:
+            results = list(
+                tqdm(
+                    executor.map(_run_global_coherence, args),
+                    total=number,
+                    desc="Computing global coherence for surrogate pairs",
+                    disable=False,
+                )
+            )
+
+        # Collect results into coh_array
+        for i, result in enumerate(results):
+            coh_array[i, :] = result
+
+        # Compute quantiles
+        quantiles = mquantiles(coh_array, qs, axis=0)
         new.signif_qs = quantiles.data
         new.signif_method = method
         new.qs = qs

diff --git a/pyleoclim/core/multipleseries.py b/pyleoclim/core/multipleseries.py
@@ -13,6 +13,8 @@
 from ..core.multivardecomp import MultivariateDecomp
 from ..core.resolutions import MultipleResolution
 
+from concurrent.futures import ProcessPoolExecutor #parallel processing library
+
 import warnings
 import numpy as np
 from copy import deepcopy
@@ -27,6 +29,62 @@
 from scipy import stats
 from statsmodels.multivariate.pca import PCA
 
+import dill
+import multiprocessing
+
+# Set `dill` as the pickler for multiprocessing
+multiprocessing.set_start_method("spawn", force=True)  
+multiprocessing.get_context("spawn").reduce = dill.dumps
+multiprocessing.get_context("spawn").rebuild = dill.loads
+
+from contextlib import contextmanager
+
+@contextmanager
+def _get_process_pool():
+    ctx = multiprocessing.get_context("spawn")
+    with ProcessPoolExecutor(mp_context=ctx) as executor:
+        yield executor
+
+
+def _run_parallel_spectral(args):
+    """Helper function to call Series.spectral in parallel."""
+    s, idx, scalogram_list, method, settings, freq, freq_kwargs, label, verbose = args
+
+    # Check if scalogram_list is provided and the index is within bounds
+    if scalogram_list and idx < len(scalogram_list.scalogram_list):
+        return s.spectral(
+            method=method,
+            settings=settings,
+            freq=freq,
+            freq_kwargs=freq_kwargs,
+            label=label,
+            verbose=verbose,
+            scalogram=scalogram_list.scalogram_list[idx],
+        )
+
+    # Default case: no scalogram passed
+    return s.spectral(
+        method=method,
+        settings=settings,
+        freq=freq,
+        freq_kwargs=freq_kwargs,
+        label=label,
+        verbose=verbose,
+    )
+
+def _run_parallel_wavelet(args):
+    """Private helper function to call Series.wavelet in parallel."""
+    s, method, settings, freq, freq_kwargs, verbose = args
+
+    # Perform wavelet analysis
+    return s.wavelet(
+        method=method,
+        settings=settings,
+        freq=freq,
+        freq_kwargs=freq_kwargs,
+        verbose=verbose,
+    )
+
 class MultipleSeries:
     '''MultipleSeries object.
 
@@ -1341,36 +1399,35 @@ def spectral(self, method='lomb_scargle', freq=None, settings=None, mute_pbar=Fa
             ms_psd.plot()
 
         '''
+
+        # main function
         settings = {} if settings is None else settings.copy()
-
-        psd_list = []
+        psd_list =[]
+
         if method in ['wwz','cwt'] and scalogram_list:
             scalogram_list_len = len(scalogram_list.scalogram_list)
             series_len = len(self.series_list)
 
-            #In the case where the scalogram list and series list are the same we can re-use scalograms in a one to one fashion
-            #OR if the scalogram list is longer than the series list we use as many scalograms from the scalogram list as we need
-            if scalogram_list_len >= series_len:
-                for idx, s in enumerate(tqdm(self.series_list, desc='Performing spectral analysis on individual series', position=0, leave=True, disable=mute_pbar)):
-                    psd_tmp = s.spectral(method=method, settings=settings, freq=freq, freq_kwargs=freq_kwargs, label=label, verbose=verbose,scalogram = scalogram_list.scalogram_list[idx])
-                    psd_list.append(psd_tmp)
-            #If the scalogram list isn't as long as the series list, we re-use all the scalograms we can and then calculate the rest
-            elif scalogram_list_len < series_len:
-                for idx, s in enumerate(tqdm(self.series_list, desc='Performing spectral analysis on individual series', position=0, leave=True, disable=mute_pbar)):
-                    if idx < scalogram_list_len:
-                        psd_tmp = s.spectral(method=method, settings=settings, freq=freq, freq_kwargs=freq_kwargs, label=label, verbose=verbose,scalogram = scalogram_list.scalogram_list[idx])
-                        psd_list.append(psd_tmp)
-                    else:
-                        psd_tmp = s.spectral(method=method, settings=settings, freq=freq, freq_kwargs=freq_kwargs, label=label, verbose=verbose)
-                        psd_list.append(psd_tmp)
+            # Prepare arguments for parallel execution
+            args = [
+                (s, idx, scalogram_list if scalogram_list_len >= series_len else None, method, settings, freq, freq_kwargs, label, verbose)
+                for idx, s in enumerate(self.series_list)
+                ]
         else:
-            for s in tqdm(self.series_list, desc='Performing spectral analysis on individual series', position=0, leave=True, disable=mute_pbar):
-                psd_tmp = s.spectral(method=method, settings=settings, freq=freq, freq_kwargs=freq_kwargs, label=label, verbose=verbose)
-                psd_list.append(psd_tmp)
-
-        psds = MultiplePSD(psd_list=psd_list)
+            args = [
+                (s, idx, None, method, settings, freq, freq_kwargs, label, verbose)
+                for idx, s in enumerate(self.series_list)
+                ]
+
+
+       # Parallel processing with ProcessPoolExecutor
+        with _get_process_pool() as executor:
+            psd_list = list(tqdm(executor.map(_run_parallel_spectral, args), 
+                         total=len(args), 
+                         desc='Performing spectral analysis on individual series', 
+                         position=0, leave=True, disable=mute_pbar))
 
-        return psds
+        return MultiplePSD(psd_list=psd_list)
 
     def wavelet(self, method='cwt', settings={}, freq=None, freq_kwargs=None, verbose=False, mute_pbar=False):
         '''Wavelet analysis
@@ -1452,16 +1509,29 @@ def wavelet(self, method='cwt', settings={}, freq=None, freq_kwargs=None, verbos
             wav = ms.wavelet(method='wwz')
 
         '''
+
         settings = {} if settings is None else settings.copy()
-
-        scal_list = []
-        for s in tqdm(self.series_list, desc='Performing wavelet analysis on individual series', position=0, leave=True, disable=mute_pbar):
-            scal_tmp = s.wavelet(method=method, settings=settings, freq=freq, freq_kwargs=freq_kwargs, verbose=verbose)
-            scal_list.append(scal_tmp)
-
-        scals = MultipleScalogram(scalogram_list=scal_list)
-
-        return scals
+
+        # Prepare arguments for parallel execution
+        args = [
+            (s, method, settings, freq, freq_kwargs, verbose)
+            for s in self.series_list
+        ]
+
+        # Parallel processing of the wavelet functionality
+        with _get_process_pool() as executor:
+            scal_list = list(
+                tqdm(
+                    executor.map(_run_parallel_wavelet, args),
+                    total=len(args),
+                    desc='Performing wavelet analysis on individual series',
+                    position=0,
+                    leave=True,
+                    disable=mute_pbar,
+                )
+            )
+
+        return MultipleScalogram(scalogram_list=scal_list)
 
     def plot(self, figsize=[10, 4],
              marker=None, markersize=None,

diff --git a/pyleoclim/core/psds.py b/pyleoclim/core/psds.py
@@ -1517,6 +1517,7 @@ def plot_traces(self, figsize=[10, 4], in_loglog=True, in_period=True, xlabel=No
         --------
 
         .. jupyter-execute::
+
             nn = 30 # number of noise realizations
             nt = 500 # timeseries length
             psds = []

diff --git a/setup.py b/setup.py
@@ -45,6 +45,7 @@ def read(fname):
         "beautifulsoup4",
         "scipy",
         "requests",
+        "dill",
     ],
-    python_requires=">=3.9",
+    python_requires=">=3.11",
 )
-Original file line number
+Diff line change
@@ Expand Up / @@ -20,4 +20,5 @@ dependencies: @@
       - pytest
       - pip:
         - pyhht
+        - dill
         - '-e .'