|
8 | 8 | import numpy as np
|
9 | 9 | import pandas as pd
|
10 | 10 | from scipy import signal
|
| 11 | +from tqdm import tqdm |
11 | 12 | import one.alf.io as alfio
|
12 | 13 | from iblutil.util import Bunch
|
13 | 14 |
|
14 | 15 | from brainbox.metrics.single_units import spike_sorting_metrics
|
| 16 | +from brainbox.io.spikeglx import stream as sglx_streamer |
15 | 17 | from ibllib.ephys import sync_probes
|
16 | 18 | from ibllib.io import spikeglx
|
17 | 19 | import ibllib.dsp as dsp
|
| 20 | +from ibllib.qc import base |
18 | 21 | from ibllib.io.extractors import ephys_fpga, training_wheel
|
19 | 22 | from ibllib.misc import print_progress
|
20 | 23 | from phylib.io import model
|
|
25 | 28 | RMS_WIN_LENGTH_SECS = 3
|
26 | 29 | WELCH_WIN_LENGTH_SAMPLES = 1024
|
27 | 30 | NCH_WAVEFORMS = 32 # number of channels to be saved in templates.waveforms and channels.waveforms
|
| 31 | +BATCHES_SPACING = 300 |
| 32 | +TMIN = 40 |
| 33 | +SAMPLE_LENGTH = 1 |
28 | 34 |
|
29 | 35 |
|
30 |
| -def rmsmap(fbin): |
| 36 | +class EphysQC(base.QC): |
| 37 | + """ |
| 38 | + A class for computing Ephys QC metrics. |
| 39 | +
|
| 40 | + :param probe_id: An existing and registered probe insertion ID. |
| 41 | + :param one: An ONE instance pointing to the database the probe_id is registered with. Optional, will instantiate |
| 42 | + default database if not given. |
| 43 | + """ |
| 44 | + |
| 45 | + def __init__(self, probe_id, **kwargs): |
| 46 | + super().__init__(probe_id, endpoint='insertions', **kwargs) |
| 47 | + self.pid = probe_id |
| 48 | + self.stream = kwargs.pop('stream', True) |
| 49 | + keys = ('ap', 'ap_meta', 'lf', 'lf_meta') |
| 50 | + self.data = Bunch.fromkeys(keys) |
| 51 | + self.metrics = {} |
| 52 | + self.outcome = 'NOT_SET' |
| 53 | + |
| 54 | + def _ensure_required_data(self): |
| 55 | + """ |
| 56 | + Ensures the datasets required for QC are available locally or remotely. |
| 57 | + """ |
| 58 | + assert self.one is not None, 'ONE instance is required to ensure required data' |
| 59 | + eid, pname = self.one.pid2eid(self.pid) |
| 60 | + self.probe_path = self.one.eid2path(eid).joinpath('raw_ephys_data', pname) |
| 61 | + # Check if there is at least one meta file available |
| 62 | + meta_files = list(self.probe_path.rglob('*.meta')) |
| 63 | + assert len(meta_files) != 0, f'No meta files in {self.probe_path}' |
| 64 | + # Check if there is no more than one meta file per type |
| 65 | + ap_meta = [meta for meta in meta_files if 'ap.meta' in meta.name] |
| 66 | + assert not len(ap_meta) > 1, f'More than one ap.meta file in {self.probe_path}. Remove redundant files to run QC' |
| 67 | + lf_meta = [meta for meta in meta_files if 'lf.meta' in meta.name] |
| 68 | + assert not len(lf_meta) > 1, f'More than one lf.meta file in {self.probe_path}. Remove redundant files to run QC' |
| 69 | + |
| 70 | + def load_data(self) -> None: |
| 71 | + """ |
| 72 | + Load any locally available data. |
| 73 | + """ |
| 74 | + # First sanity check |
| 75 | + self._ensure_required_data() |
| 76 | + |
| 77 | + _logger.info('Gathering data for QC') |
| 78 | + # Load metadata and, if locally present, bin file |
| 79 | + for dstype in ['ap', 'lf']: |
| 80 | + # We already checked that there is not more than one meta file per type |
| 81 | + meta_file = next(self.probe_path.rglob(f'*{dstype}.meta'), None) |
| 82 | + if meta_file is None: |
| 83 | + _logger.warning(f'No {dstype}.meta file in {self.probe_path}, skipping QC for {dstype} data.') |
| 84 | + else: |
| 85 | + self.data[f'{dstype}_meta'] = spikeglx.read_meta_data(meta_file) |
| 86 | + bin_file = next(meta_file.parent.glob(f'*{dstype}.*bin'), None) |
| 87 | + self.data[f'{dstype}'] = spikeglx.Reader(bin_file, open=True) if bin_file is not None else None |
| 88 | + |
| 89 | + def run(self, update: bool = False, overwrite: bool = True, stream: bool = None, **kwargs) -> (str, dict): |
| 90 | + """ |
| 91 | + Run QC on samples of the .ap file, and on the entire file for .lf data if it is present. |
| 92 | +
|
| 93 | + :param update: bool, whether to update the qc json fields for this probe. Default is False. |
| 94 | + :param overwrite: bool, whether to overwrite locally existing outputs of this function. Default is False. |
| 95 | + :param stream: bool, whether to stream the samples of the .ap data if not locally available. Defaults to value |
| 96 | + set in class init (True if none set). |
| 97 | + :return: A list of QC output files. In case of a complete run that is one file for .ap and three files for .lf. |
| 98 | + """ |
| 99 | + # If stream is explicitly given in run, overwrite value from init |
| 100 | + if stream is not None: |
| 101 | + self.stream = stream |
| 102 | + # Load data |
| 103 | + self.load_data() |
| 104 | + qc_files = [] |
| 105 | + # If ap meta file present, calculate median RMS per channel before and after destriping |
| 106 | + # TODO: This should go a a separate function once we have a spikeglx.Streamer that behaves like the Reader |
| 107 | + if self.data.ap_meta: |
| 108 | + rms_file = self.probe_path.joinpath("_iblqc_ephysChannels.apRMS.npy") |
| 109 | + if rms_file.exists() and not overwrite: |
| 110 | + _logger.warning(f'File {rms_file} already exists and overwrite=False. Skipping RMS compute.') |
| 111 | + median_rms = np.load(rms_file) |
| 112 | + else: |
| 113 | + rl = self.data.ap_meta.fileTimeSecs |
| 114 | + nc = spikeglx._get_nchannels_from_meta(self.data.ap_meta) |
| 115 | + t0s = np.arange(TMIN, rl - SAMPLE_LENGTH, BATCHES_SPACING) |
| 116 | + all_rms = np.zeros((2, nc - 1, t0s.shape[0])) |
| 117 | + # If the ap.bin file is not present locally, stream it |
| 118 | + if self.data.ap is None and self.stream is True: |
| 119 | + _logger.warning(f'Streaming .ap data to compute RMS samples for probe {self.pid}') |
| 120 | + for i, t0 in enumerate(tqdm(t0s)): |
| 121 | + sr, _ = sglx_streamer(self.pid, t0=t0, nsecs=1, one=self.one) |
| 122 | + raw = sr[:, :-1].T |
| 123 | + destripe = dsp.destripe(raw, fs=sr.fs, neuropixel_version=1) |
| 124 | + all_rms[0, :, i] = dsp.rms(raw) |
| 125 | + all_rms[1, :, i] = dsp.rms(destripe) |
| 126 | + elif self.data.ap is None and self.stream is not True: |
| 127 | + _logger.warning('Raw .ap data is not available locally. Run with stream=True in order to stream ' |
| 128 | + 'data for calculating RMS samples.') |
| 129 | + else: |
| 130 | + _logger.info(f'Computing RMS samples for .ap data using local data in {self.probe_path}') |
| 131 | + for i, t0 in enumerate(t0s): |
| 132 | + sl = slice(int(t0 * self.data.ap.fs), int((t0 + SAMPLE_LENGTH) * self.data.ap.fs)) |
| 133 | + raw = self.data.ap[sl, :-1].T |
| 134 | + destripe = dsp.destripe(raw, fs=self.data.ap.fs, neuropixel_version=1) |
| 135 | + all_rms[0, :, i] = dsp.rms(raw) |
| 136 | + all_rms[1, :, i] = dsp.rms(destripe) |
| 137 | + # Calculate the median RMS across all samples per channel |
| 138 | + median_rms = np.median(all_rms, axis=-1) |
| 139 | + np.save(rms_file, median_rms) |
| 140 | + qc_files.append(rms_file) |
| 141 | + |
| 142 | + for p in [10, 90]: |
| 143 | + self.metrics[f'apRms_p{p}_raw'] = np.format_float_scientific(np.percentile(median_rms[0, :], p), |
| 144 | + precision=2) |
| 145 | + self.metrics[f'apRms_p{p}_proc'] = np.format_float_scientific(np.percentile(median_rms[1, :], p), |
| 146 | + precision=2) |
| 147 | + if update: |
| 148 | + self.update_extended_qc(self.metrics) |
| 149 | + # self.update(outcome) |
| 150 | + |
| 151 | + # If lf meta and bin file present, run the old qc on LF data |
| 152 | + if self.data.lf_meta and self.data.lf: |
| 153 | + qc_files.extend(extract_rmsmap(self.data.lf, out_folder=self.probe_path, overwrite=overwrite)) |
| 154 | + |
| 155 | + return qc_files |
| 156 | + |
| 157 | + |
| 158 | +def rmsmap(sglx): |
31 | 159 | """
|
32 | 160 | Computes RMS map in time domain and spectra for each channel of Neuropixel probe
|
33 | 161 |
|
34 |
| - :param fbin: binary file in spike glx format (will look for attached metatdata) |
35 |
| - :type fbin: str or pathlib.Path |
| 162 | + :param sglx: Open spikeglx reader |
36 | 163 | :return: a dictionary with amplitudes in channeltime space, channelfrequency space, time
|
37 | 164 | and frequency scales
|
38 | 165 | """
|
39 |
| - sglx = spikeglx.Reader(fbin, open=True) |
40 | 166 | rms_win_length_samples = 2 ** np.ceil(np.log2(sglx.fs * RMS_WIN_LENGTH_SECS))
|
41 | 167 | # the window generator will generates window indices
|
42 | 168 | wingen = dsp.WindowGenerator(ns=sglx.ns, nswin=rms_win_length_samples, overlap=0)
|
@@ -68,33 +194,31 @@ def rmsmap(fbin):
|
68 | 194 | return win
|
69 | 195 |
|
70 | 196 |
|
71 |
| -def extract_rmsmap(fbin, out_folder=None, overwrite=False): |
| 197 | +def extract_rmsmap(sglx, out_folder=None, overwrite=False): |
72 | 198 | """
|
73 | 199 | Wrapper for rmsmap that outputs _ibl_ephysRmsMap and _ibl_ephysSpectra ALF files
|
74 | 200 |
|
75 |
| - :param fbin: binary file in spike glx format (will look for attached metatdata) |
| 201 | + :param sglx: Open spikeglx Reader with data for which to compute rmsmap |
76 | 202 | :param out_folder: folder in which to store output ALF files. Default uses the folder in which
|
77 | 203 | the `fbin` file lives.
|
78 | 204 | :param overwrite: do not re-extract if all ALF files already exist
|
79 | 205 | :param label: string or list of strings that will be appended to the filename before extension
|
80 | 206 | :return: None
|
81 | 207 | """
|
82 |
| - _logger.info(f"Computing QC for {fbin}") |
83 |
| - sglx = spikeglx.Reader(fbin) |
84 |
| - # check if output ALF files exist already: |
85 | 208 | if out_folder is None:
|
86 |
| - out_folder = Path(fbin).parent |
| 209 | + out_folder = sglx.file_bin.parent |
87 | 210 | else:
|
88 | 211 | out_folder = Path(out_folder)
|
| 212 | + _logger.info(f"Computing RMS map for .{sglx.type} data in {out_folder}") |
89 | 213 | alf_object_time = f'ephysTimeRms{sglx.type.upper()}'
|
90 | 214 | alf_object_freq = f'ephysSpectralDensity{sglx.type.upper()}'
|
91 | 215 | files_time = list(out_folder.glob(f"_iblqc_{alf_object_time}*"))
|
92 | 216 | files_freq = list(out_folder.glob(f"_iblqc_{alf_object_freq}*"))
|
93 | 217 | if (len(files_time) == 2 == len(files_freq)) and not overwrite:
|
94 |
| - _logger.warning(f'{fbin.name} QC already exists, skipping. Use overwrite option.') |
| 218 | + _logger.warning(f'RMS map already exists for .{sglx.type} data in {out_folder}, skipping. Use overwrite option.') |
95 | 219 | return files_time + files_freq
|
96 | 220 | # crunch numbers
|
97 |
| - rms = rmsmap(fbin) |
| 221 | + rms = rmsmap(sglx) |
98 | 222 | # output ALF files, single precision with the optional label as suffix before extension
|
99 | 223 | if not out_folder.exists():
|
100 | 224 | out_folder.mkdir()
|
|
0 commit comments