Skip to content

Commit fa8f3d4

Browse files
committed
Merge branch 'release/2.1.1'
2 parents c5887ba + f877fee commit fa8f3d4

File tree

9 files changed

+272
-25
lines changed

9 files changed

+272
-25
lines changed

brainbox/io/spikeglx.py

Lines changed: 55 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
1+
import logging
2+
from pathlib import Path
13
import time
4+
25
import numpy as np
3-
# (Previously required `os.path` to get file info before memmapping)
4-
# import os.path as op
6+
57
from ibllib.io import spikeglx
68

9+
_logger = logging.getLogger('ibllib')
10+
711

812
def extract_waveforms(ephys_file, ts, ch, t=2.0, sr=30000, n_ch_probe=385, car=True):
913
"""
@@ -100,3 +104,52 @@ def extract_waveforms(ephys_file, ts, ch, t=2.0, sr=30000, n_ch_probe=385, car=T
100104
print('Done. ({})'.format(time.ctime()))
101105

102106
return waveforms
107+
108+
109+
def stream(pid, t0, nsecs=1, one=None, cache_folder=None, dsets=None, typ='ap'):
110+
"""
111+
NB: returned Reader object must be closed after use
112+
:param pid: Probe UUID
113+
:param t0: time of the first sample
114+
:param nsecs: duration of the streamed data
115+
:param one: An instance of ONE
116+
:param cache_folder:
117+
:param typ: 'ap' or 'lf'
118+
:return: sr, dsets, t0
119+
"""
120+
CHUNK_DURATION_SECS = 1 # the mtscomp chunk duration. Right now it's a constant
121+
if nsecs > 10:
122+
ValueError(f'Streamer works only with 10 or less seconds, set nsecs to lesss than {nsecs}')
123+
assert one
124+
assert typ in ['lf', 'ap']
125+
t0 = np.floor(t0 / CHUNK_DURATION_SECS) * CHUNK_DURATION_SECS
126+
if cache_folder is None:
127+
samples_folder = Path(one.alyx._par.CACHE_DIR).joinpath('cache', typ)
128+
sample_file_name = Path(f"{pid}_{str(int(t0)).zfill(5)}.meta")
129+
130+
if samples_folder.joinpath(sample_file_name).exists():
131+
_logger.info(f'loading {sample_file_name} from cache')
132+
sr = spikeglx.Reader(samples_folder.joinpath(sample_file_name).with_suffix('.bin'),
133+
open=True)
134+
return sr, t0
135+
136+
eid, pname = one.pid2eid(pid)
137+
cbin_rec = one.list_datasets(eid, collection=f"*{pname}", filename='*ap.*bin', details=True)
138+
ch_rec = one.list_datasets(eid, collection=f"*{pname}", filename='*ap.ch', details=True)
139+
meta_rec = one.list_datasets(eid, collection=f"*{pname}", filename='*ap.meta', details=True)
140+
ch_file = one._download_datasets(ch_rec)[0]
141+
one._download_datasets(meta_rec)[0]
142+
143+
first_chunk = int(t0 / CHUNK_DURATION_SECS)
144+
last_chunk = int((t0 + nsecs) / CHUNK_DURATION_SECS) - 1
145+
146+
samples_folder.mkdir(exist_ok=True, parents=True)
147+
sr = spikeglx.download_raw_partial(
148+
one=one,
149+
url_cbin=one.record2url(cbin_rec)[0],
150+
url_ch=ch_file,
151+
first_chunk=first_chunk,
152+
last_chunk=last_chunk,
153+
cache_dir=samples_folder)
154+
155+
return sr, t0

ibllib/dsp/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
from .fourier import fscale, freduce, fexpand, lp, hp, bp, fshift, dephas, fit_phase
22
from .utils import rms, WindowGenerator, rises, falls, fronts, fcn_cosine
3+
from .voltage import destripe

ibllib/ephys/ephysqc.py

Lines changed: 136 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,16 @@
88
import numpy as np
99
import pandas as pd
1010
from scipy import signal
11+
from tqdm import tqdm
1112
import one.alf.io as alfio
1213
from iblutil.util import Bunch
1314

1415
from brainbox.metrics.single_units import spike_sorting_metrics
16+
from brainbox.io.spikeglx import stream as sglx_streamer
1517
from ibllib.ephys import sync_probes
1618
from ibllib.io import spikeglx
1719
import ibllib.dsp as dsp
20+
from ibllib.qc import base
1821
from ibllib.io.extractors import ephys_fpga, training_wheel
1922
from ibllib.misc import print_progress
2023
from phylib.io import model
@@ -25,18 +28,141 @@
2528
RMS_WIN_LENGTH_SECS = 3
2629
WELCH_WIN_LENGTH_SAMPLES = 1024
2730
NCH_WAVEFORMS = 32 # number of channels to be saved in templates.waveforms and channels.waveforms
31+
BATCHES_SPACING = 300
32+
TMIN = 40
33+
SAMPLE_LENGTH = 1
2834

2935

30-
def rmsmap(fbin):
36+
class EphysQC(base.QC):
37+
"""
38+
A class for computing Ephys QC metrics.
39+
40+
:param probe_id: An existing and registered probe insertion ID.
41+
:param one: An ONE instance pointing to the database the probe_id is registered with. Optional, will instantiate
42+
default database if not given.
43+
"""
44+
45+
def __init__(self, probe_id, **kwargs):
46+
super().__init__(probe_id, endpoint='insertions', **kwargs)
47+
self.pid = probe_id
48+
self.stream = kwargs.pop('stream', True)
49+
keys = ('ap', 'ap_meta', 'lf', 'lf_meta')
50+
self.data = Bunch.fromkeys(keys)
51+
self.metrics = {}
52+
self.outcome = 'NOT_SET'
53+
54+
def _ensure_required_data(self):
55+
"""
56+
Ensures the datasets required for QC are available locally or remotely.
57+
"""
58+
assert self.one is not None, 'ONE instance is required to ensure required data'
59+
eid, pname = self.one.pid2eid(self.pid)
60+
self.probe_path = self.one.eid2path(eid).joinpath('raw_ephys_data', pname)
61+
# Check if there is at least one meta file available
62+
meta_files = list(self.probe_path.rglob('*.meta'))
63+
assert len(meta_files) != 0, f'No meta files in {self.probe_path}'
64+
# Check if there is no more than one meta file per type
65+
ap_meta = [meta for meta in meta_files if 'ap.meta' in meta.name]
66+
assert not len(ap_meta) > 1, f'More than one ap.meta file in {self.probe_path}. Remove redundant files to run QC'
67+
lf_meta = [meta for meta in meta_files if 'lf.meta' in meta.name]
68+
assert not len(lf_meta) > 1, f'More than one lf.meta file in {self.probe_path}. Remove redundant files to run QC'
69+
70+
def load_data(self) -> None:
71+
"""
72+
Load any locally available data.
73+
"""
74+
# First sanity check
75+
self._ensure_required_data()
76+
77+
_logger.info('Gathering data for QC')
78+
# Load metadata and, if locally present, bin file
79+
for dstype in ['ap', 'lf']:
80+
# We already checked that there is not more than one meta file per type
81+
meta_file = next(self.probe_path.rglob(f'*{dstype}.meta'), None)
82+
if meta_file is None:
83+
_logger.warning(f'No {dstype}.meta file in {self.probe_path}, skipping QC for {dstype} data.')
84+
else:
85+
self.data[f'{dstype}_meta'] = spikeglx.read_meta_data(meta_file)
86+
bin_file = next(meta_file.parent.glob(f'*{dstype}.*bin'), None)
87+
self.data[f'{dstype}'] = spikeglx.Reader(bin_file, open=True) if bin_file is not None else None
88+
89+
def run(self, update: bool = False, overwrite: bool = True, stream: bool = None, **kwargs) -> (str, dict):
90+
"""
91+
Run QC on samples of the .ap file, and on the entire file for .lf data if it is present.
92+
93+
:param update: bool, whether to update the qc json fields for this probe. Default is False.
94+
:param overwrite: bool, whether to overwrite locally existing outputs of this function. Default is False.
95+
:param stream: bool, whether to stream the samples of the .ap data if not locally available. Defaults to value
96+
set in class init (True if none set).
97+
:return: A list of QC output files. In case of a complete run that is one file for .ap and three files for .lf.
98+
"""
99+
# If stream is explicitly given in run, overwrite value from init
100+
if stream is not None:
101+
self.stream = stream
102+
# Load data
103+
self.load_data()
104+
qc_files = []
105+
# If ap meta file present, calculate median RMS per channel before and after destriping
106+
# TODO: This should go a a separate function once we have a spikeglx.Streamer that behaves like the Reader
107+
if self.data.ap_meta:
108+
rms_file = self.probe_path.joinpath("_iblqc_ephysChannels.apRMS.npy")
109+
if rms_file.exists() and not overwrite:
110+
_logger.warning(f'File {rms_file} already exists and overwrite=False. Skipping RMS compute.')
111+
median_rms = np.load(rms_file)
112+
else:
113+
rl = self.data.ap_meta.fileTimeSecs
114+
nc = spikeglx._get_nchannels_from_meta(self.data.ap_meta)
115+
t0s = np.arange(TMIN, rl - SAMPLE_LENGTH, BATCHES_SPACING)
116+
all_rms = np.zeros((2, nc - 1, t0s.shape[0]))
117+
# If the ap.bin file is not present locally, stream it
118+
if self.data.ap is None and self.stream is True:
119+
_logger.warning(f'Streaming .ap data to compute RMS samples for probe {self.pid}')
120+
for i, t0 in enumerate(tqdm(t0s)):
121+
sr, _ = sglx_streamer(self.pid, t0=t0, nsecs=1, one=self.one)
122+
raw = sr[:, :-1].T
123+
destripe = dsp.destripe(raw, fs=sr.fs, neuropixel_version=1)
124+
all_rms[0, :, i] = dsp.rms(raw)
125+
all_rms[1, :, i] = dsp.rms(destripe)
126+
elif self.data.ap is None and self.stream is not True:
127+
_logger.warning('Raw .ap data is not available locally. Run with stream=True in order to stream '
128+
'data for calculating RMS samples.')
129+
else:
130+
_logger.info(f'Computing RMS samples for .ap data using local data in {self.probe_path}')
131+
for i, t0 in enumerate(t0s):
132+
sl = slice(int(t0 * self.data.ap.fs), int((t0 + SAMPLE_LENGTH) * self.data.ap.fs))
133+
raw = self.data.ap[sl, :-1].T
134+
destripe = dsp.destripe(raw, fs=self.data.ap.fs, neuropixel_version=1)
135+
all_rms[0, :, i] = dsp.rms(raw)
136+
all_rms[1, :, i] = dsp.rms(destripe)
137+
# Calculate the median RMS across all samples per channel
138+
median_rms = np.median(all_rms, axis=-1)
139+
np.save(rms_file, median_rms)
140+
qc_files.append(rms_file)
141+
142+
for p in [10, 90]:
143+
self.metrics[f'apRms_p{p}_raw'] = np.format_float_scientific(np.percentile(median_rms[0, :], p),
144+
precision=2)
145+
self.metrics[f'apRms_p{p}_proc'] = np.format_float_scientific(np.percentile(median_rms[1, :], p),
146+
precision=2)
147+
if update:
148+
self.update_extended_qc(self.metrics)
149+
# self.update(outcome)
150+
151+
# If lf meta and bin file present, run the old qc on LF data
152+
if self.data.lf_meta and self.data.lf:
153+
qc_files.extend(extract_rmsmap(self.data.lf, out_folder=self.probe_path, overwrite=overwrite))
154+
155+
return qc_files
156+
157+
158+
def rmsmap(sglx):
31159
"""
32160
Computes RMS map in time domain and spectra for each channel of Neuropixel probe
33161
34-
:param fbin: binary file in spike glx format (will look for attached metatdata)
35-
:type fbin: str or pathlib.Path
162+
:param sglx: Open spikeglx reader
36163
:return: a dictionary with amplitudes in channeltime space, channelfrequency space, time
37164
and frequency scales
38165
"""
39-
sglx = spikeglx.Reader(fbin, open=True)
40166
rms_win_length_samples = 2 ** np.ceil(np.log2(sglx.fs * RMS_WIN_LENGTH_SECS))
41167
# the window generator will generates window indices
42168
wingen = dsp.WindowGenerator(ns=sglx.ns, nswin=rms_win_length_samples, overlap=0)
@@ -68,33 +194,31 @@ def rmsmap(fbin):
68194
return win
69195

70196

71-
def extract_rmsmap(fbin, out_folder=None, overwrite=False):
197+
def extract_rmsmap(sglx, out_folder=None, overwrite=False):
72198
"""
73199
Wrapper for rmsmap that outputs _ibl_ephysRmsMap and _ibl_ephysSpectra ALF files
74200
75-
:param fbin: binary file in spike glx format (will look for attached metatdata)
201+
:param sglx: Open spikeglx Reader with data for which to compute rmsmap
76202
:param out_folder: folder in which to store output ALF files. Default uses the folder in which
77203
the `fbin` file lives.
78204
:param overwrite: do not re-extract if all ALF files already exist
79205
:param label: string or list of strings that will be appended to the filename before extension
80206
:return: None
81207
"""
82-
_logger.info(f"Computing QC for {fbin}")
83-
sglx = spikeglx.Reader(fbin)
84-
# check if output ALF files exist already:
85208
if out_folder is None:
86-
out_folder = Path(fbin).parent
209+
out_folder = sglx.file_bin.parent
87210
else:
88211
out_folder = Path(out_folder)
212+
_logger.info(f"Computing RMS map for .{sglx.type} data in {out_folder}")
89213
alf_object_time = f'ephysTimeRms{sglx.type.upper()}'
90214
alf_object_freq = f'ephysSpectralDensity{sglx.type.upper()}'
91215
files_time = list(out_folder.glob(f"_iblqc_{alf_object_time}*"))
92216
files_freq = list(out_folder.glob(f"_iblqc_{alf_object_freq}*"))
93217
if (len(files_time) == 2 == len(files_freq)) and not overwrite:
94-
_logger.warning(f'{fbin.name} QC already exists, skipping. Use overwrite option.')
218+
_logger.warning(f'RMS map already exists for .{sglx.type} data in {out_folder}, skipping. Use overwrite option.')
95219
return files_time + files_freq
96220
# crunch numbers
97-
rms = rmsmap(fbin)
221+
rms = rmsmap(sglx)
98222
# output ALF files, single precision with the optional label as suffix before extension
99223
if not out_folder.exists():
100224
out_folder.mkdir()

ibllib/io/spikeglx.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -691,7 +691,7 @@ def get_sync_map(folder_ephys):
691691
return _sync_map_from_hardware_config(hc)
692692

693693

694-
def download_raw_partial(url_cbin, url_ch, first_chunk=0, last_chunk=0, one=None):
694+
def download_raw_partial(url_cbin, url_ch, first_chunk=0, last_chunk=0, one=None, cache_dir=None):
695695
"""
696696
TODO Document
697697
:param url_cbin:
@@ -702,10 +702,11 @@ def download_raw_partial(url_cbin, url_ch, first_chunk=0, last_chunk=0, one=None
702702
"""
703703
assert str(url_cbin).endswith('.cbin')
704704
assert str(url_ch).endswith('.ch')
705-
webclient = (one or ONE()).alyx
706-
705+
one = one or ONE()
706+
webclient = one.alyx
707+
cache_dir = cache_dir or webclient.cache_dir
707708
relpath = Path(url_cbin.replace(webclient._par.HTTP_DATA_SERVER, '.')).parents[0]
708-
target_dir = Path(webclient.cache_dir, relpath)
709+
target_dir = Path(cache_dir, relpath)
709710
Path(target_dir).mkdir(parents=True, exist_ok=True)
710711

711712
# First, download the .ch file if necessary
@@ -715,7 +716,7 @@ def download_raw_partial(url_cbin, url_ch, first_chunk=0, last_chunk=0, one=None
715716
ch_file = Path(webclient.download_file(
716717
url_ch, cache_dir=target_dir, clobber=True, return_md5=False))
717718
ch_file = remove_uuid_file(ch_file)
718-
ch_file_stream = ch_file.with_suffix('.stream.ch')
719+
ch_file_stream = target_dir.joinpath(ch_file.name).with_suffix('.stream.ch')
719720

720721
# Load the .ch file.
721722
with open(ch_file, 'r') as f:
@@ -725,6 +726,11 @@ def download_raw_partial(url_cbin, url_ch, first_chunk=0, last_chunk=0, one=None
725726
i0 = cmeta['chunk_bounds'][first_chunk]
726727
ns_stream = cmeta['chunk_bounds'][last_chunk + 1] - i0
727728

729+
# handles the meta file
730+
meta_local_path = ch_file_stream.with_suffix('.meta')
731+
if not meta_local_path.exists():
732+
shutil.copy(ch_file.with_suffix('.meta'), meta_local_path)
733+
728734
# if the cached version happens to be the same as the one on disk, just load it
729735
if ch_file_stream.exists():
730736
with open(ch_file_stream, 'r') as f:
@@ -769,7 +775,5 @@ def download_raw_partial(url_cbin, url_ch, first_chunk=0, last_chunk=0, one=None
769775
cbin_local_path.replace(cbin_local_path_renamed)
770776
assert cbin_local_path_renamed.exists()
771777

772-
shutil.copy(cbin_local_path.with_suffix('.meta'),
773-
cbin_local_path_renamed.with_suffix('.meta'))
774778
reader = Reader(cbin_local_path_renamed)
775779
return reader

ibllib/pipes/ephys_preprocessing.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from ibllib.io.extractors import ephys_fpga, ephys_passive, camera
1818
from ibllib.pipes import tasks
1919
from ibllib.pipes.training_preprocessing import TrainingRegisterRaw as EphysRegisterRaw
20+
from ibllib.pipes.misc import create_alyx_probe_insertions
2021
from ibllib.qc.task_extractors import TaskQCExtractor
2122
from ibllib.qc.task_metrics import TaskQC
2223
from ibllib.qc.camera import run_all_qc as run_camera_qc
@@ -60,7 +61,16 @@ class RawEphysQC(tasks.Task):
6061
input_files = signatures.RAWEPHYSQC
6162

6263
def _run(self, overwrite=False):
63-
qc_files = ephysqc.raw_qc_session(self.session_path, overwrite=overwrite)
64+
eid = self.one.path2eid(self.session_path)
65+
pids = [x['id'] for x in self.one.alyx.rest('insertions', 'list', session=eid)]
66+
# Usually there should be two probes, if there are less, check if all probes are registered
67+
if len(pids) < 2:
68+
_logger.warning(f"{len(pids)} probes registered for session {eid}, trying to register from local data")
69+
pids = [p['id'] for p in create_alyx_probe_insertions(self.session_path, one=self.one)]
70+
qc_files = []
71+
for pid in pids:
72+
eqc = ephysqc.EphysQC(pid, one=self.one)
73+
qc_files.extend(eqc.run(update=True, overwrite=overwrite))
6474
return qc_files
6575

6676

ibllib/pipes/misc.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -355,7 +355,7 @@ def create_alyx_probe_insertions(
355355
):
356356
if one is None:
357357
one = ONE(cache_rest=None)
358-
eid = session_path if is_uuid_string(session_path) else one.eid_from_path(session_path)
358+
eid = session_path if is_uuid_string(session_path) else one.path2eid(session_path)
359359
if eid is None:
360360
print("Session not found on Alyx: please create session before creating insertions")
361361
if model is None:

0 commit comments

Comments
 (0)