From 8fecbf3c599c3357219ba47235e4511bc09c501d Mon Sep 17 00:00:00 2001 From: Patrice Mascalchi <18047969+pmascalchi@users.noreply.github.com> Date: Fri, 11 Aug 2023 16:39:17 +0200 Subject: [PATCH] Update ProcessMultipleExcelTables_FromAivia.py v1.60 --- .../ProcessMultipleExcelTables_FromAivia.py | 97 +++++++++++++------ 1 file changed, 70 insertions(+), 27 deletions(-) diff --git a/Recipes/ProcessMeasurementTables/ProcessMultipleExcelTables_FromAivia.py b/Recipes/ProcessMeasurementTables/ProcessMultipleExcelTables_FromAivia.py index c39ef8f..fe9bcaa 100644 --- a/Recipes/ProcessMeasurementTables/ProcessMultipleExcelTables_FromAivia.py +++ b/Recipes/ProcessMeasurementTables/ProcessMultipleExcelTables_FromAivia.py @@ -12,6 +12,7 @@ def search_activation_path(): return '' activate_path = search_activation_path() + if os.path.exists(activate_path): exec(open(activate_path).read(), {'__file__': activate_path}) print(f'Aivia virtual environment activated\nUsing python: {activate_path}') @@ -33,7 +34,7 @@ def search_activation_path(): from datetime import datetime # Folder to quickly run the script on all Excel files in it -DEFAULT_FOLDER = r'' +DEFAULT_FOLDER = '' # Collect scenario scenario_descriptions = ['A: Select multiple xlsx tables to create a combined table.\n' @@ -64,16 +65,28 @@ def search_activation_path(): 'Not compatible with timelapses.' ] +# Relationship definitions (Warning: names should be the sheets in the spreadsheets) +# Example: 'Set': ['Obj1', 'Obj2'] relationships = {'Neuron Set': ['Soma Set', 'Dendrite Set', 'Dendrite Segments'], - 'Dendrite Set': ['Dendrite Segments']} -relationship_ID_headers = {'Neuron Set': 'Neuron ID', 'Dendrite Set': 'Tree ID'} + 'Dendrite Set': ['Dendrite Segments'], + 'Cells': ['Cell Membranes', 'Cytoplasm', 'Nucleus', 'Vesicles - ']} + +# Due to discrepancy between object name and ID header, we can provide the correspondence below +relationship_ID_headers = {'Neuron Set': 'Neuron ID', 'Dendrite Set': 'Tree ID', 'Cells': 'Cell ID'} +# Measurements to extract, to avoid too many columns in the final table relationship_measurements = {'Soma Set': ['Volume (µm³)'], 'Dendrite Set': ['Mean Diameter (µm)'], - 'Dendrite Segments': ['Mean Diameter (µm)', 'Total Path Length (µm)', 'Branch Angle'] - } + 'Dendrite Segments': ['Mean Diameter (µm)', 'Total Path Length (µm)', 'Branch Angle'], + 'Cell Membranes': [], + 'Cytoplasm': [], + 'Nucleus': [], + 'Vesicles - ': ['Relation Count']} -relationships_with_stats = ['Dendrite Set', 'Dendrite Segments'] +# Selection of secondary relationships for which statistics are calculated: 'Total', 'Average'. +relationships_with_stats = ['Dendrite Set', 'Dendrite Segments', 'Vesicles - '] + +# Some statistics do not make any sense, so below are the ones to avoid relationship_measurements_stats_todrop = {'Branch Angle': 'Total', 'Mean Diameter (µm)': 'Total'} @@ -301,8 +314,8 @@ def run(params): if do_multiple_files_as_cols: output_basename = 'Analysis_All results.xlsx' else: - output_basename = '{}_grouped.xlsx'.format(os.path.basename(indiv_path_list[0]).split('.')[0]) - output_file = os.path.join(output_folder, output_basename) + output_basename = '{}_grouped.xlsx'.format(''.join(os.path.basename(indiv_path_list[0]).split('.')[:-1])) + output_file = os.path.join(output_folder, output_basename.replace('.aivia', '')) df_grouped = {} # init @@ -317,7 +330,7 @@ def run(params): do_combine_meas_tabs = False # not possible as columns = measurements # Detect multiwell batch - process_wells = is_multiwell(well_ref_for_tables[0]) + process_wells = is_multiwell(well_ref_for_tables[0]) # TODO: Remove summary tab if True? # First table in final table df_grouped = df_raw_1 @@ -434,6 +447,12 @@ def run(params): if do_combine_meas_tabs: df_grouped = combine_tabs(df_grouped) + # Collecting all measurements exact names + all_meas_names = [] + for tmp_df in df_grouped.values(): + if not 'summary' in str(tmp_df.columns[0]).lower(): + all_meas_names.extend(tmp_df.columns[1:]) + # Specific to neurons: split dendrite trees from segments if 'Dendrite Set' in df_grouped.keys(): df_grouped_to_add = {} @@ -457,22 +476,32 @@ def run(params): else: df_grouped[n] = df_grouped_tmp[n] - # Process relationships between object sets (see definition before this code) - for rel_k in relationships.keys(): - # Check presence of primary object + # Process relationships between object sets (see definition before the def run) + for rel_k in relationships.keys(): # E.g. 'Cells' + # Select all tabs where the primary object is # E.g. 'Cells.Cell_Cytoplasm Volume ...' for k in [it_k for it_k in df_grouped.keys() if rel_k in it_k]: - k_suffix = k.replace(rel_k, '') + k_suffix = k.replace(rel_k, '') # Important when multiple object sets existed (' (2)') + + # Check presence of secondary object defined by relationships for rel_s in relationships[rel_k]: - s = rel_s + k_suffix - # Check presence of secondary object - if s in df_grouped.keys(): - # Check presence of correct ID header in measurements in order to associate objects - id_header = relationship_ID_headers[rel_k] - if id_header in df_grouped[s].columns: - prefix = s + '.' - selected_meas = relationship_measurements[rel_s] - df_grouped[k] = calculate_relation_stats(df_grouped[k], df_grouped[s], id_header, - prefix, rel_s, selected_meas) + # v1.60 gives the ability to provide only the beginning of the object name ('Vesicles - ') + # It also provides relationships of multiple secondary objects beginning with the same name + + for s in [it_s for it_s in df_grouped.keys() if is_same_object_set(it_s, k_suffix)]: + + if rel_s in s: # positive match for secondary object + # Check presence of correct ID header in measurements in order to associate objects + id_header = relationship_ID_headers[rel_k] + if id_header in df_grouped[s].columns: + prefix = s + '.' + selected_meas_prefixes = relationship_measurements[rel_s] + + # See if prefixes exists in exact names of measurements (columns) + for meas_prefix in selected_meas_prefixes: + meas = [col for col in df_grouped[s].columns if meas_prefix in col] + print('Collecting statistics ({}) from [{}] to be reported for [{}]'.format(meas, s, k)) + df_grouped[k] = calculate_relation_stats(df_grouped[k], df_grouped[s], + id_header, prefix, rel_s, meas) # Collecting summary values for k in df_grouped.keys(): @@ -567,7 +596,7 @@ def run(params): t += 1 # Adding percentages of objects if multiple object sets exists - if do_combine_meas_tabs: + if do_combine_meas_tabs: # TODO: for do_multiple_files_as_cols too?? if len(total_counts) > 1: # Collect tab names without summary df_grouped_keys_nosum = [k for k in df_grouped.keys() if not k.endswith('Summary')] @@ -596,7 +625,7 @@ def run(params): # Writing sheets to excel with pd.ExcelWriter(output_file, engine="openpyxl") as writer: # Write Summary first - df_grouped[summary_lbl].to_excel(writer, sheet_name='Summary', index=False) + df_grouped[summary_lbl].to_excel(writer, sheet_name=summary_lbl, index=False) # Resizing columns for c in range(0, len(df_grouped[summary_lbl].columns)): @@ -605,14 +634,14 @@ def run(params): len_longest_text = df_grouped[summary_lbl].iloc[:, c].map(str).str.len().max() writer.sheets[summary_lbl].column_dimensions[col_letter].width = len_longest_text * 1.5 - for sh in [d for d in df_grouped.keys() if d != 'Summary']: + for sh in [d for d in df_grouped.keys() if d != summary_lbl]: df_grouped[sh].to_excel(writer, sheet_name=sh, index=False) # Resizing columns for c in range(0, len(df_grouped[sh].columns)): col_letter = openpyxl.utils.cell.get_column_letter(c + 1) len_longest_text = len(str(df_grouped[sh].columns[c])) - if c == 0: # First column with measurement name and object names + if c == 0 and df_grouped[sh].shape[0] > 1: # First column with measurement name and object names if len(str(df_grouped[sh].iloc[1, 0])) > len_longest_text: len_longest_text = len(str(df_grouped[sh].iloc[1, 0])) if len_longest_text < 10: @@ -806,6 +835,8 @@ def combine_tabs(df_raw): def calculate_relation_stats(df_i, df_ii, id_header, meas_prefix, obj_ii_type, measurements): global relationships_with_stats, relationship_measurements_stats_todrop + # Measurements input needs to be a list + df_to_add = pd.DataFrame() if set(df_ii.columns) & set(measurements): @@ -870,6 +901,15 @@ def split_dendrite_set_and_segments(df): return dendrite_set_df, dendrite_seg_df +# Function to distinguish 'Cells' from 'Cells (2)' and 'Cells (3)'. Used in relationship detection. +def is_same_object_set(name, suffix): + if suffix == '': + ans = True if not name.endswith(')') else False + else: + ans = True if name.endswith(suffix) else False + return ans + + def get_split_name(txt: str): # Check if previous object set name is present in txt # prev_obj_name = '' if it is for the first measurement tab or if there is only one object set with no child objects @@ -978,3 +1018,6 @@ def Mbox(title, text, style): # Also fixing a bug with formatting of 'Analysis_Summary' when not multiwell # Fixed wrong sorting of subfolders such as 'Job 9', 'Job 10', etc. # v1.55: - New virtual env code for auto-activation +# v1.56: - Bug fix since Aivia 12.0 (r38705) security release for scenario F where only the summary tab is output +# v1.60: - Add Cell Analysis support for relationship grouping. Better recognition of object sets with numbers '(1)' +# - Bug fixed at line 660 (if result table is empty)