Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

enable multiprocessing for spectral, wavelet and cross-wavelet analysis #637

Merged
merged 3 commits into from
Jan 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,5 @@ dependencies:
- pytest
- pip:
- pyhht
- dill
- '-e .'
93 changes: 77 additions & 16 deletions pyleoclim/core/coherences.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,13 @@
from ..utils import plotting
from ..utils import wavelet as waveutils
from ..core.scalograms import Scalogram, MultipleScalogram
import dill
import multiprocessing

# Set `dill` as the pickler for multiprocessing
multiprocessing.set_start_method("spawn", force=True) # Use "fork" (most compatible with dill)
multiprocessing.get_context("spawn").reduce = dill.dumps
multiprocessing.get_context("spawn").rebuild = dill.loads

import matplotlib.pyplot as plt
import numpy as np
Expand All @@ -20,6 +27,27 @@
from scipy.stats.mstats import mquantiles
import warnings

from concurrent.futures import ProcessPoolExecutor #parallel processing library
from contextlib import contextmanager

def _run_wavelet_coherence(args):
"""Helper function for parallel wavelet coherence computation."""
surr1_series, surr2_series, wave_method, wave_args = args
return surr1_series.wavelet_coherence(
surr2_series, method=wave_method, settings=wave_args
)

def _run_global_coherence(args):
"""Helper function for computing global coherence between surrogate series."""
surr_series1, surr_series2, wavelet_kwargs = args
return surr_series1.global_coherence(surr_series2, wavelet_kwargs=wavelet_kwargs).global_coh

@contextmanager
def _get_process_pool():
ctx = multiprocessing.get_context("spawn")
with ProcessPoolExecutor(mp_context=ctx) as executor:
yield executor

class Coherence:
'''Coherence object, meant to receive the WTC and XWT part of Series.wavelet_coherence()

Expand Down Expand Up @@ -689,6 +717,7 @@ def signif_test(self, number=200, method='ar1sim', seed=None, qs=[0.95], setting
coh.signif_test(method='phaseran').plot()
'''
from ..core.surrogateseries import SurrogateSeries
from ..core.series import Series #This is necessary for the multiprocessing pickling process!!! DO NOT REMOVE!!!!!

if number == 0:
return self
Expand All @@ -700,16 +729,32 @@ def signif_test(self, number=200, method='ar1sim', seed=None, qs=[0.95], setting
surr2 = SurrogateSeries(method=method,number=number, seed=seed)
surr2.from_series(self.timeseries2)

# adjust time axis

wtcs, xwts = [], []
# Prepare arguments for parallel processing
args = [
(
surr1.series_list[i],
surr2.series_list[i],
self.wave_method,
self.wave_args,
)
for i in range(number)
]


# Perform wavelet coherence calculations in parallel
with _get_process_pool() as executor:
results = list(
tqdm(
executor.map(_run_wavelet_coherence, args),
total=number,
desc="Performing wavelet coherence on surrogate pairs",
disable=mute_pbar,
)
)

for i in tqdm(range(number), desc='Performing wavelet coherence on surrogate pairs', total=number, disable=mute_pbar):
coh_tmp = surr1.series_list[i].wavelet_coherence(surr2.series_list[i],
method = self.wave_method,
settings = self.wave_args)
wtcs.append(coh_tmp.wtc)
xwts.append(coh_tmp.xwt)
# Split results into wtcs and xwts
wtcs = [result.wtc for result in results]
xwts = [result.xwt for result in results]

wtcs = np.array(wtcs)
xwts = np.array(xwts)
Expand Down Expand Up @@ -976,13 +1021,29 @@ def signif_test(self,method='ar1sim',number=200,qs=[.95]):
'method':self.coh.wave_method,
}

for i in range(number):
surr_series1 = surr1.series_list[i]
surr_series2 = surr2.series_list[i]
surr_coh = surr_series1.global_coherence(surr_series2,wavelet_kwargs=wavelet_kwargs)
coh_array[i,:] = surr_coh.global_coh

quantiles = mquantiles(coh_array,qs,axis=0)
# Prepare arguments for parallel processing
args = [
(surr1.series_list[i], surr2.series_list[i], wavelet_kwargs)
for i in range(number)
]

# Use DillProcessPoolExecutor for parallel execution
with _get_process_pool() as executor:
results = list(
tqdm(
executor.map(_run_global_coherence, args),
total=number,
desc="Computing global coherence for surrogate pairs",
disable=False,
)
)

# Collect results into coh_array
for i, result in enumerate(results):
coh_array[i, :] = result

# Compute quantiles
quantiles = mquantiles(coh_array, qs, axis=0)
new.signif_qs = quantiles.data
new.signif_method = method
new.qs = qs
Expand Down
134 changes: 102 additions & 32 deletions pyleoclim/core/multipleseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
from ..core.multivardecomp import MultivariateDecomp
from ..core.resolutions import MultipleResolution

from concurrent.futures import ProcessPoolExecutor #parallel processing library

import warnings
import numpy as np
from copy import deepcopy
Expand All @@ -27,6 +29,62 @@
from scipy import stats
from statsmodels.multivariate.pca import PCA

import dill
import multiprocessing

# Set `dill` as the pickler for multiprocessing
multiprocessing.set_start_method("spawn", force=True)
multiprocessing.get_context("spawn").reduce = dill.dumps
multiprocessing.get_context("spawn").rebuild = dill.loads

from contextlib import contextmanager

@contextmanager
def _get_process_pool():
ctx = multiprocessing.get_context("spawn")
with ProcessPoolExecutor(mp_context=ctx) as executor:
yield executor


def _run_parallel_spectral(args):
"""Helper function to call Series.spectral in parallel."""
s, idx, scalogram_list, method, settings, freq, freq_kwargs, label, verbose = args

# Check if scalogram_list is provided and the index is within bounds
if scalogram_list and idx < len(scalogram_list.scalogram_list):
return s.spectral(
method=method,
settings=settings,
freq=freq,
freq_kwargs=freq_kwargs,
label=label,
verbose=verbose,
scalogram=scalogram_list.scalogram_list[idx],
)

# Default case: no scalogram passed
return s.spectral(
method=method,
settings=settings,
freq=freq,
freq_kwargs=freq_kwargs,
label=label,
verbose=verbose,
)

def _run_parallel_wavelet(args):
"""Private helper function to call Series.wavelet in parallel."""
s, method, settings, freq, freq_kwargs, verbose = args

# Perform wavelet analysis
return s.wavelet(
method=method,
settings=settings,
freq=freq,
freq_kwargs=freq_kwargs,
verbose=verbose,
)

class MultipleSeries:
'''MultipleSeries object.

Expand Down Expand Up @@ -1341,36 +1399,35 @@ def spectral(self, method='lomb_scargle', freq=None, settings=None, mute_pbar=Fa
ms_psd.plot()

'''

# main function
settings = {} if settings is None else settings.copy()

psd_list = []
psd_list =[]

if method in ['wwz','cwt'] and scalogram_list:
scalogram_list_len = len(scalogram_list.scalogram_list)
series_len = len(self.series_list)

#In the case where the scalogram list and series list are the same we can re-use scalograms in a one to one fashion
#OR if the scalogram list is longer than the series list we use as many scalograms from the scalogram list as we need
if scalogram_list_len >= series_len:
for idx, s in enumerate(tqdm(self.series_list, desc='Performing spectral analysis on individual series', position=0, leave=True, disable=mute_pbar)):
psd_tmp = s.spectral(method=method, settings=settings, freq=freq, freq_kwargs=freq_kwargs, label=label, verbose=verbose,scalogram = scalogram_list.scalogram_list[idx])
psd_list.append(psd_tmp)
#If the scalogram list isn't as long as the series list, we re-use all the scalograms we can and then calculate the rest
elif scalogram_list_len < series_len:
for idx, s in enumerate(tqdm(self.series_list, desc='Performing spectral analysis on individual series', position=0, leave=True, disable=mute_pbar)):
if idx < scalogram_list_len:
psd_tmp = s.spectral(method=method, settings=settings, freq=freq, freq_kwargs=freq_kwargs, label=label, verbose=verbose,scalogram = scalogram_list.scalogram_list[idx])
psd_list.append(psd_tmp)
else:
psd_tmp = s.spectral(method=method, settings=settings, freq=freq, freq_kwargs=freq_kwargs, label=label, verbose=verbose)
psd_list.append(psd_tmp)
# Prepare arguments for parallel execution
args = [
(s, idx, scalogram_list if scalogram_list_len >= series_len else None, method, settings, freq, freq_kwargs, label, verbose)
for idx, s in enumerate(self.series_list)
]
else:
for s in tqdm(self.series_list, desc='Performing spectral analysis on individual series', position=0, leave=True, disable=mute_pbar):
psd_tmp = s.spectral(method=method, settings=settings, freq=freq, freq_kwargs=freq_kwargs, label=label, verbose=verbose)
psd_list.append(psd_tmp)

psds = MultiplePSD(psd_list=psd_list)
args = [
(s, idx, None, method, settings, freq, freq_kwargs, label, verbose)
for idx, s in enumerate(self.series_list)
]


# Parallel processing with ProcessPoolExecutor
with _get_process_pool() as executor:
psd_list = list(tqdm(executor.map(_run_parallel_spectral, args),
total=len(args),
desc='Performing spectral analysis on individual series',
position=0, leave=True, disable=mute_pbar))

return psds
return MultiplePSD(psd_list=psd_list)

def wavelet(self, method='cwt', settings={}, freq=None, freq_kwargs=None, verbose=False, mute_pbar=False):
'''Wavelet analysis
Expand Down Expand Up @@ -1452,16 +1509,29 @@ def wavelet(self, method='cwt', settings={}, freq=None, freq_kwargs=None, verbos
wav = ms.wavelet(method='wwz')

'''

settings = {} if settings is None else settings.copy()

scal_list = []
for s in tqdm(self.series_list, desc='Performing wavelet analysis on individual series', position=0, leave=True, disable=mute_pbar):
scal_tmp = s.wavelet(method=method, settings=settings, freq=freq, freq_kwargs=freq_kwargs, verbose=verbose)
scal_list.append(scal_tmp)

scals = MultipleScalogram(scalogram_list=scal_list)

return scals

# Prepare arguments for parallel execution
args = [
(s, method, settings, freq, freq_kwargs, verbose)
for s in self.series_list
]

# Parallel processing of the wavelet functionality
with _get_process_pool() as executor:
scal_list = list(
tqdm(
executor.map(_run_parallel_wavelet, args),
total=len(args),
desc='Performing wavelet analysis on individual series',
position=0,
leave=True,
disable=mute_pbar,
)
)

return MultipleScalogram(scalogram_list=scal_list)

def plot(self, figsize=[10, 4],
marker=None, markersize=None,
Expand Down
1 change: 1 addition & 0 deletions pyleoclim/core/psds.py
Original file line number Diff line number Diff line change
Expand Up @@ -1517,6 +1517,7 @@ def plot_traces(self, figsize=[10, 4], in_loglog=True, in_period=True, xlabel=No
--------

.. jupyter-execute::

nn = 30 # number of noise realizations
nt = 500 # timeseries length
psds = []
Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def read(fname):
"beautifulsoup4",
"scipy",
"requests",
"dill",
],
python_requires=">=3.9",
python_requires=">=3.11",
)
Loading