-
Notifications
You must be signed in to change notification settings - Fork 1
First Level Analysis
Mandy Renfro edited this page Apr 9, 2018
·
20 revisions
Unlike the create_ev.ipynb
script, the lvl1.py
script is a simple python file that is executed using a second python "submit" script.
Go to /home/data/madlab/scripts/tools/sample_scripts
to find the sample scripts wmaze_lvl1.py
and wmaze_lvl1_submit.py
.
- Open
wmaze_lvl1.py
- Make sure you see
#!/usr/bin/env python
as the first line; if it's missing, add it! - The next part is important -- it provides information about the current analysis. This is especially important for future you and other researchers in the lab: a description of what you're doing. It is also the perfect place to write special notes so you don't have to reinvent the wheel 37 times.
"""
=============================================================================
wmaze_fMRI: Mandy Thesis -- Fixed before Conditional -- Model 3 Version 1.0.0
=============================================================================
First level workflow for UM GE 750 wmaze task data
- WMAZE Model 3 Version 1.0.0
1 - Normal general linear model (not LSS)
0 - Does not account for the last three volumes (scanner artifacts)
0 - No method to control or account for the last three volumes/trials
- EV Directory (Model3)--- /home/data/madlab/data/mri/wmaze/scanner_behav/WMAZE_001/MRthesis/model3
- python wmaze_lvl1.py -s WMAZE_001
-o /home/data/madlab/data/mri/wmaze/frstlvl/wmaze/model3_1-0-0/lvl1
-w /home/data/madlab/scripts/wmaze/model3/model3_1-0-0/status/lvl1
**Note: DOF file is writing out in numpy hexadecimal format
Example: 0x1.64p+7
print((1 + 6./0x10 + 4./0x100) * 2**7) = 178
"""
- Import all your libraries and tools (most of which will be Nipype)
import os
from nipype.pipeline.engine import Workflow, Node, MapNode
from nipype.interfaces.utility import IdentityInterface
from nipype.interfaces.utility import Function
from nipype.utils.misc import getsource
from nipype.interfaces.io import DataGrabber
from nipype.algorithms.modelgen import SpecifyModel
from nipype.interfaces.fsl.model import Level1Design
from nipype.interfaces.fsl.model import FEATModel
from nipype.interfaces.fsl.model import FILMGLS
from nipype.interfaces.fsl.model import ContrastMgr
from nipype.interfaces.fsl.utils import ImageMaths
from nipype.interfaces.io import DataSink
from nipype.interfaces.utility import Merge
- First you will need to create the subjectinfo() function. With this function, you will create a "Bunch" for each run/set, for each subject. This will open the EV files you created with the previous script and organize the onsets, durations, and amplitudes for each of the EV types -- which will be fed into the Nipype pipeline.
- Below you will see a complex set of condition statements concerning the number of each EV file: if the size does not equal at least 3 (one trial worth of info), it will not try to create an empty file (which would make the pipeline crash).
def subjectinfo(subject_id):
import os
from nipype.interfaces.base import Bunch
from copy import deepcopy
import numpy as np
base_proj_dir = '/home/data/madlab/data/mri/wmaze/scanner_behav'
output = []
for curr_run in range(1,7):
names = []
onsets = []
durations = []
amplitudes = []
data_before_B_corr = np.genfromtxt(base_proj_dir +
'/{0}/MRthesis/model3/EVs/' +
r{1}_before_B_corr.txt'.format(subject_id,
curr_run),
dtype = str)
data_before_B_incorr = np.genfromtxt(base_proj_dir +
'/{0}/MRthesis/model3/EVs/' +
'r{1}_before_B_incorr.txt'.format(subject_id,
curr_run),
dtype = str)
data_all_remain = np.genfromtxt(base_proj_dir +
'/{0}/MRthesis/model3/EVs/' +
r{1}_all_remain.txt'.format(subject_id,
curr_run),
dtype = str)
data_nonresponse = np.genfromtxt(base_proj_dir +
'/{0}/MRthesis/model3/EVs/' +
'r{1}_nonresponse.txt'.format(subject_id,
curr_run),
dtype = str)
sequence = ['all_before_B']
for curr_type in sequence:
corr_array_name = eval('data_{0}_corr'.format(curr_type))
incorr_array_name = eval('data_{0}_incorr'.format(curr_type))
if incorr_array_name.size > 0: #MORE THAN ONE MISTAKE MADE
curr_names = ['{0}_corr'.format(curr_type),
'{0}_incorr'.format(curr_type)]
curr_corr_onsets = map(float, corr_array_name[:,0])
curr_corr_durations = map(float, corr_array_name[:,1])
curr_corr_amplitudes = map(float, corr_array_name[:,2])
if incorr_array_name.size == 3: #ONLY ONE ERROR
curr_incorr_onsets = [float(incorr_array_name[0])]
curr_incorr_durations = [float(incorr_array_name[1])]
curr_incorr_amplitudes = [float(incorr_array_name[2])]
else: #MORE THAN ONE ERROR
curr_incorr_onsets = map(float, incorr_array_name[:,0])
curr_incorr_durations = map(float, incorr_array_name[:,1])
curr_incorr_amplitudes = map(float,incorr_array_name[:,2])
curr_onsets = [curr_corr_onsets, curr_incorr_onsets]
curr_durations = [curr_corr_durations, curr_incorr_durations]
curr_amplitudes = [curr_corr_amplitudes, curr_incorr_amplitudes]
else: #NO MISTAKES WERE MADE
curr_names = ['{0}_corr'.format(curr_type)]
curr_corr_onsets = map(float, corr_array_name[:,0])
curr_corr_durations = map(float, corr_array_name[:,1])
curr_corr_amplitudes = map(float, corr_array_name[:,2])
curr_onsets = [curr_corr_onsets]
curr_durations = [curr_corr_durations]
curr_amplitudes = [curr_corr_amplitudes]
names.append(curr_names)
onsets.append(curr_onsets)
durations.append(curr_durations)
amplitudes.append(curr_amplitudes)
curr_names = ['all_remaining']
curr_corr_onsets = map(float, data_all_remaining[:,0])
curr_corr_durations = map(float, data_all_remaining[:,1])
curr_corr_amplitudes = map(float, data_all_remaining[:,2])
curr_onsets = [curr_corr_onsets]
curr_durations = [curr_corr_durations]
curr_amplitudes = [curr_corr_amplitudes]
names.append(curr_names)
onsets.append(curr_onsets)
durations.append(curr_durations)
amplitudes.append(curr_amplitudes)
if any(isinstance(el, list) for el in names):
names = [el for sublist in names for el in sublist]
if any(isinstance(el, list) for el in onsets):
onsets = [el_o for sublist_o in onsets for el_o in sublist_o]
if any(isinstance(el, list) for el in durations):
durations = [el_d for sublist_d in durations for el_d in sublist_d]
if any(isinstance(el, list) for el in amplitudes):
amplitudes = [el_a for sublist_a in amplitudes for el_a in sublist_a]
output.insert(curr_run,
Bunch(conditions = names,
onsets = deepcopy(onsets),
durations = deepcopy(durations),
amplitudes = deepcopy(amplitudes),
tmod = None,
pmod = None,
regressor_names = None,
regressors = None))
return output
- Another necessary function is get_contrasts() -- this will be used to set the precise contrasts you desire in the statistical analysis.
- The below example shows you one way to flexibly create contrasts when you cannot control the number of trials that fall into each (such as correct vs. incorrect).
def get_contrasts(subject_id, info):
contrasts = []
for i, j in enumerate(info):
curr_run_contrasts = []
cont_all = ['AllVsBase', 'T', j.conditions,
[1. / len(j.conditions)] * len(j.conditions)]
curr_run_contrasts.append(cont_all)
for curr_cond in j.conditions:
curr_cont = [curr_cond, 'T', [curr_cond], [1]]
curr_run_contrasts.append(curr_cont)
if 'before_B_corr' in j.conditions and 'before_B_incorr' in j.conditions:
cont_corr_vs_incorr = ['corr_minus_incorr', 'T',
['before_B_corr','before_B_incorr'],
[1, -1]]
cont_incorr_vs_corr = ['incorr_minus_corr', 'T',
['before_B_corr','before_B_incorr'],
[-1, 1]]
curr_run_contrasts.append(cont_corr_vs_incorr)
curr_run_contrasts.append(cont_incorr_vs_corr)
contrasts.append(curr_run_contrasts)
return contrasts
- The get_subs() function is used to customizing the way the output files will be named:
def get_subs(cons):
subs = []
for run_cons in cons:
run_subs = []
for i, con in enumerate(run_cons):
run_subs.append(('cope%d.'%(i + 1),
'cope%02d_%s.'%(i + 1, con[0])))
run_subs.append(('varcope%d.'%(i + 1),
'varcope%02d_%s.'%(i + 1, con[0])))
run_subs.append(('zstat%d.'%(i + 1),
'zstat%02d_%s.'%(i + 1, con[0])))
run_subs.append(('tstat%d.'%(i + 1),
'tstat%02d_%s.'%(i + 1, con[0])))
subs.append(run_subs)
return subs
- The motion_noise() function extracts motion parameters from the noise files:
def motion_noise(subjinfo, files):
import numpy as np
motion_noise_params = []
motion_noi_par_names = []
if not isinstance(files, list):
files = [files]
if not isinstance(subjinfo, list):
subjinfo = [subjinfo]
for j,i in enumerate(files):
curr_mot_noi_par_names = ['Pitch (rad)', 'Roll (rad)', 'Yaw (rad)',
'Tx (mm)', 'Ty (mm)', 'Tz (mm)',
'Pitch_1d', 'Roll_1d', 'Yaw_1d',
'Tx_1d', 'Ty_1d', 'Tz_1d',
'Norm (mm)', 'LG_1stOrd', 'LG_2ndOrd',
'LG_3rdOrd', 'LG_4thOrd']
a = np.genfromtxt(i)
motion_noise_params.append([[]] * a.shape[1])
if a.shape[1] > 17:
for num_out in range(a.shape[1] - 17):
out_name = 'out_{0}'.format(num_out + 1)
curr_mot_noi_par_names.append(out_name)
for z in range(a.shape[1]):
motion_noise_params[j][z] = a[:, z].tolist()
motion_noi_par_names.append(curr_mot_noi_par_names)
for j,i in enumerate(subjinfo):
if i.regressor_names == None:
i.regressor_names = []
if i.regressors == None:
i.regressors = []
for j3, i3 in enumerate(motion_noise_params[j]):
i.regressor_names.append(motion_noi_par_names[j][j3])
i.regressors.append(i3)
return subjinfo
- The final function grabs the first dimension of an array/matrix
pop_lambda = lambda x : x[0]
- The pipeline is formatted itself as a function -- which will be called later in this script.
- The first thing to do is to give the workflow a proper name (e.g. wmaze_frstlvl_wf):
def firstlevel_wf(subject_id,
sink_directory,
name = 'wmaze_frstlvl_wf'):
frstlvl_wf = Workflow(name = 'frstlvl_wf')
- You will now create a dictionary holding the wildcard to be used in the datasource node:
info = dict(task_mri_files = [['subject_id', 'wmaze']],
motion_noise_files = [['subject_id', 'filter_regressor']])
- Next, you need to make a node which calls subjectinfo(), containing the name, onset, duration, and amplitude info:
subject_info = Node(Function(input_names = ['subject_id'],
output_names = ['output'],
function = subjectinfo),
name = 'subject_info')
subject_info.inputs.ignore_exception = False
subject_info.inputs.subject_id = subject_id
- Create another function node to define the contrasts for the experiment by calling get_contrasts():
getcontrasts = Node(Function(input_names = ['subject_id', 'info'],
output_names = ['contrasts'],
function = get_contrasts),
name = 'getcontrasts')
getcontrasts.inputs.ignore_exception = False
getcontrasts.inputs.subject_id = subject_id
frstlvl_wf.connect(subject_info, 'output',
getcontrasts, 'info')
- Create a function node to substitute names of folders and files created during pipeline by calling get_subs():
getsubs = Node(Function(input_names = ['cons'],
output_names = ['subs'],
function = get_subs),
name = 'getsubs')
getsubs.inputs.ignore_exception = False
getsubs.inputs.subject_id = subject_id
frstlvl_wf.connect(subject_info, 'output',
getsubs, 'info')
frstlvl_wf.connect(getcontrasts, 'contrasts',
getsubs, 'cons')
- Create a datasource node to get the task_mri and motion-noise files.
- In this node, you need to make sure that the file paths are correct for your data
- The base directory should be the path until the task_mri_files and motion_noise_files split from one another.
datasource = Node(DataGrabber(infields = ['subject_id'],
outfields = info.keys()),
name = 'datasource')
datasource.inputs.template = '*'
datasource.inputs.subject_id = subject_id
datasource.inputs.base_directory = os.path.abspath('/home/data/madlab' +
'/data/mri/wmaze/preproc/')
datasource.inputs.field_template = dict(task_mri_files = '%s/func/'
'smoothed_fullspectrum/' +
'_maskfunc2*/*%s*.nii.gz',
motion_noise_files = '%s/noise' +
'/%s*.txt')
datasource.inputs.template_args = info
datasource.inputs.sort_filelist = True
datasource.inputs.ignore_exception = False
datasource.inputs.raise_on_empty = True
- Function node to modify the motion and noise files to be single regressors
motionnoise = Node(Function(input_names = ['subjinfo', 'files'],
output_names = ['subjinfo'],
function = motion_noise),
name = 'motionnoise')
motionnoise.inputs.ignore_exception = False
frstlvl_wf.connect(subject_info, 'output',
motionnoise, 'subjinfo')
frstlvl_wf.connect(datasource, 'motion_noise_files',
motionnoise, 'files')
- Makes a model specification compatible with spm/fsl designers
- Requires subject_info to be received in the form of a Bunch of a list of Bunch
specify_model = Node(SpecifyModel(),
name = 'specify_model')
specify_model.inputs.high_pass_filter_cutoff = -1.0
specify_model.inputs.ignore_exception = False
specify_model.inputs.input_units = 'secs'
specify_model.inputs.time_repetition = 2.0
frstlvl_wf.connect(datasource, 'task_mri_files',
specify_model, 'functional_runs')
frstlvl_wf.connect(motionnoise, 'subjinfo',
specify_model, 'subject_info')
- Basic interface class generates identity mappings:
modelfit_inputspec = Node(IdentityInterface(fields = ['session_info',
'interscan_interval',
'contrasts',
'film_threshold',
'functional_data',
'bases',
'model_serial_corr'],
mandatory_inputs = True),
name = 'modelfit_inputspec')
modelfit_inputspec.inputs.bases = {'dgamma':{'derivs': False}}
modelfit_inputspec.inputs.film_threshold = 0.0
modelfit_inputspec.inputs.interscan_interval = 2.0
modelfit_inputspec.inputs.model_serial_correlations = True
frstlvl_wf.connect(datasource, 'task_mri_files',
modelfit_inputspec, 'functional_data')
frstlvl_wf.connect(getcontrasts, 'contrasts',
modelfit_inputspec, 'contrasts')
frstlvl_wf.connect(specify_model, 'session_info',
modelfit_inputspec, 'session_info')
- Creates the node to generate the files needed for the FEAT model:
level1_design = MapNode(Level1Design(),
iterfield = ['contrasts', 'session_info'],
name = 'level1_design')
level1_design.inputs.ignore_exception = False
frstlvl_wf.connect(modelfit_inputspec, 'interscan_interval',
level1_design, 'interscan_interval')
frstlvl_wf.connect(modelfit_inputspec, 'session_info',
level1_design, 'session_info')
frstlvl_wf.connect(modelfit_inputspec, 'contrasts',
level1_design, 'contrasts')
frstlvl_wf.connect(modelfit_inputspec, 'bases',
level1_design, 'bases')
frstlvl_wf.connect(modelfit_inputspec, 'model_serial_corr',
level1_design, 'model_serial_correlations')
- Create a MapNode to generate a model for each run
generate_model = MapNode(FEATModel(),
iterfield = ['fsf_file', 'ev_files'],
name = 'generate_model')
generate_model.inputs.environ = {'FSLOUTPUTTYPE': 'NIFTI_GZ'}
generate_model.inputs.ignore_exception = False
generate_model.inputs.output_type = 'NIFTI_GZ'
generate_model.inputs.terminal_output = 'stream'
frstlvl_wf.connect(level1_design, 'fsf_files',
generate_model, 'fsf_file')
frstlvl_wf.connect(level1_design, 'ev_files',
generate_model, 'ev_files')
- Create a MapNode to estimate the model using FILMGLS and fit a design matrix to a voxel time-series:
estimate_model = MapNode(FILMGLS(),
iterfield = ['design_file',
'in_file', 'tcon_file'],
name = 'estimate_model')
estimate_model.inputs.environ = {'FSLOUTPUTTYPE': 'NIFTI_GZ'}
estimate_model.inputs.ignore_exception = False
estimate_model.inputs.mask_size = 5
estimate_model.inputs.output_type = 'NIFTI_GZ'
estimate_model.inputs.results_dir = 'results'
estimate_model.inputs.smooth_autocorr = True
estimate_model.inputs.terminal_output = 'stream'
frstlvl_wf.connect(modelfit_inputspec, 'film_threshold',
estimate_model, 'threshold')
frstlvl_wf.connect(modelfit_inputspec, 'functional_data',
estimate_model, 'in_file')
frstlvl_wf.connect(generate_model, 'design_file',
estimate_model, 'design_file')
frstlvl_wf.connect(generate_model, 'con_file',
estimate_model, 'tcon_file')
- Create a merge node to merge the contrasts - necessary for fsl 5.0.7 and greater:
merge_contrasts = MapNode(Merge(2),
iterfield = ['in1'],
name = 'merge_contrasts')
frstlvl_wf.connect(estimate_model, 'zstats',
merge_contrasts, 'in1')
- Create a MapNode to transform the z-score to a p value using pop_lambda:
z2pval = MapNode(ImageMaths(),
# Iterate over 'in_file'
iterfield = ['in_file'],
name='z2pval')
z2pval.inputs.environ = {'FSLOUTPUTTYPE': 'NIFTI_GZ'}
z2pval.inputs.ignore_exception = False
z2pval.inputs.op_string = '-ztop'
z2pval.inputs.output_type = 'NIFTI_GZ'
z2pval.inputs.suffix = '_pval'
z2pval.inputs.terminal_output = 'stream'
frstlvl_wf.connect(merge_contrasts, ('out', pop_lambda),
z2pval, 'in_file')
- Create an outputspec node using IdentityInterface() to receive information from estimate_model, merge_contrasts, z2pval, generate_model, and estimate_model:
modelfit_outputspec = Node(IdentityInterface(fields = ['copes', 'varcopes',
'dof_file', 'pfiles',
'parameter_estimates',
'zstats',
'design_image',
'design_file',
'design_cov',
'sigmasquareds'],
mandatory_inputs = True),
name = 'modelfit_outputspec')
frstlvl_wf.connect(estimate_model, 'copes',
modelfit_outputspec, 'copes')
frstlvl_wf.connect(estimate_model, 'varcopes',
modelfit_outputspec, 'varcopes')
frstlvl_wf.connect(merge_contrasts, 'out',
modelfit_outputspec, 'zstats')
frstlvl_wf.connect(z2pval, 'out_file',
modelfit_outputspec, 'pfiles')
frstlvl_wf.connect(generate_model, 'design_image',
modelfit_outputspec, 'design_image')
frstlvl_wf.connect(generate_model, 'design_file',
modelfit_outputspec, 'design_file')
frstlvl_wf.connect(generate_model, 'design_cov',
modelfit_outputspec, 'design_cov')
frstlvl_wf.connect(estimate_model, 'param_estimates',
modelfit_outputspec, 'parameter_estimates')
frstlvl_wf.connect(estimate_model, 'dof_file',
modelfit_outputspec, 'dof_file')
frstlvl_wf.connect(estimate_model, 'sigmasquareds',
modelfit_outputspec, 'sigmasquareds')
- Finally, you'll create a Datasink() Mapnode to sink your data to specific locations:
sinkd = MapNode(DataSink(),
iterfield = ['substitutions', 'modelfit.contrasts.@copes',
'modelfit.contrasts.@varcopes',
'modelfit.estimates', 'modelfit.contrasts.@zstats'],
name = 'sinkd')
sinkd.inputs.base_directory = sink_directory
sinkd.inputs.container = subject_id
frstlvl_wf.connect(getsubs, 'subs',
sinkd, 'substitutions')
frstlvl_wf.connect(modelfit_outputspec, 'parameter_estimates',
sinkd, 'modelfit.estimates')
frstlvl_wf.connect(modelfit_outputspec, 'sigmasquareds',
sinkd, 'modelfit.estimates.@sigsq')
frstlvl_wf.connect(modelfit_outputspec, 'dof_file',
sinkd, 'modelfit.dofs')
frstlvl_wf.connect(modelfit_outputspec, 'copes',
sinkd, 'modelfit.contrasts.@copes')
frstlvl_wf.connect(modelfit_outputspec, 'varcopes',
sinkd, 'modelfit.contrasts.@varcopes')
frstlvl_wf.connect(modelfit_outputspec, 'zstats',
sinkd, 'modelfit.contrasts.@zstats')
frstlvl_wf.connect(modelfit_outputspec, 'design_image',
sinkd, 'modelfit.design')
frstlvl_wf.connect(modelfit_outputspec, 'design_cov',
sinkd, 'modelfit.design.@cov')
frstlvl_wf.connect(modelfit_outputspec, 'design_file',
sinkd, 'modelfit.design.@matrix')
frstlvl_wf.connect(modelfit_outputspec, 'pfiles',
sinkd, 'modelfit.contrasts.@pstats')
return frstlvl_wf
- Above you will notice
return frstlvl_wf
-- this is the closure of the pipeline function.
The following code uses the above function, as well as information from the SUBMIT script to execute the pipeline.
- You may wish to change the
crashdump_dir
def create_frstlvl_workflow(args, name = 'wmaze_MR_frstlvl'):
kwargs = dict(subject_id = args.subject_id,
sink_directory = os.path.abspath(args.out_dir),
name = name)
frstlvl_workflow = firstlevel_wf(**kwargs)
return frstlvl_workflow
if __name__ == "__main__":
from argparse import ArgumentParser
parser = ArgumentParser(description = __doc__)
parser.add_argument("-s", "--subject_id", dest = "subject_id",
help = "Current subject id", required = True)
parser.add_argument("-o", "--output_dir", dest = "out_dir",
help = "Output directory base")
parser.add_argument("-w", "--work_dir", dest = "work_dir",
help = "Working directory base")
args = parser.parse_args()
wf = create_frstlvl_workflow(args)
if args.work_dir:
work_dir = os.path.abspath(args.work_dir)
else:
work_dir = os.getcwd()
wf.config['execution']['crashdump_dir'] = '/scratch/madlab/crash/'
wf.base_dir = work_dir + '/' + args.subject_id
wf.run(plugin = 'LSF', plugin_args = {'bsub_args': '-q PQ_madlab'})
- First, make sure you see
#!/usr/bin/env python
as the first line of code in the script. - Next you'll need to
import os
- Create an array for the subject IDs:
subjs = ['WMAZE_001', 'WMAZE_002', 'WMAZE_004', 'WMAZE_005', 'WMAZE_006',
'WMAZE_007', 'WMAZE_008', 'WMAZE_009', 'WMAZE_010', 'WMAZE_012',
'WMAZE_017', 'WMAZE_018', 'WMAZE_019', 'WMAZE_020', 'WMAZE_021',
'WMAZE_022', 'WMAZE_023', 'WMAZE_024', 'WMAZE_026', 'WMAZE_027']
- Define your work and output directories:
workdir = '/home/data/madlab/scripts/wmaze/fixed_before_conditional/model3/status/lvl1'
outdir = '/home/data/madlab/data/mri/wmaze/frstlvl/fixed_before_conditional/model3_drop3'
- Moving DICOMs to HPC
- DICOM Conversion
- Freesurfer Recon_All, Quality Assurance, and Resubmission
- Preprocessing
- Normalization To Be Completed
- Creation of EV Files
- First Level Analysis
- Second Level Analysis To Be Completed
- Group Level Analysis To Be Completed
- DWI To Be Completed