Skip to content

Commit 8fecbf3

Browse files
authored
Update ProcessMultipleExcelTables_FromAivia.py
v1.60
1 parent 5e14795 commit 8fecbf3

File tree

1 file changed

+70
-27
lines changed

1 file changed

+70
-27
lines changed

Recipes/ProcessMeasurementTables/ProcessMultipleExcelTables_FromAivia.py

Lines changed: 70 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ def search_activation_path():
1212
return ''
1313

1414
activate_path = search_activation_path()
15+
1516
if os.path.exists(activate_path):
1617
exec(open(activate_path).read(), {'__file__': activate_path})
1718
print(f'Aivia virtual environment activated\nUsing python: {activate_path}')
@@ -33,7 +34,7 @@ def search_activation_path():
3334
from datetime import datetime
3435

3536
# Folder to quickly run the script on all Excel files in it
36-
DEFAULT_FOLDER = r''
37+
DEFAULT_FOLDER = ''
3738

3839
# Collect scenario
3940
scenario_descriptions = ['A: Select multiple xlsx tables to create a combined table.\n'
@@ -64,16 +65,28 @@ def search_activation_path():
6465
'Not compatible with timelapses.'
6566
]
6667

68+
# Relationship definitions (Warning: names should be the sheets in the spreadsheets)
69+
# Example: 'Set': ['Obj1', 'Obj2']
6770
relationships = {'Neuron Set': ['Soma Set', 'Dendrite Set', 'Dendrite Segments'],
68-
'Dendrite Set': ['Dendrite Segments']}
69-
relationship_ID_headers = {'Neuron Set': 'Neuron ID', 'Dendrite Set': 'Tree ID'}
71+
'Dendrite Set': ['Dendrite Segments'],
72+
'Cells': ['Cell Membranes', 'Cytoplasm', 'Nucleus', 'Vesicles - ']}
73+
74+
# Due to discrepancy between object name and ID header, we can provide the correspondence below
75+
relationship_ID_headers = {'Neuron Set': 'Neuron ID', 'Dendrite Set': 'Tree ID', 'Cells': 'Cell ID'}
7076

77+
# Measurements to extract, to avoid too many columns in the final table
7178
relationship_measurements = {'Soma Set': ['Volume (µm³)'],
7279
'Dendrite Set': ['Mean Diameter (µm)'],
73-
'Dendrite Segments': ['Mean Diameter (µm)', 'Total Path Length (µm)', 'Branch Angle']
74-
}
80+
'Dendrite Segments': ['Mean Diameter (µm)', 'Total Path Length (µm)', 'Branch Angle'],
81+
'Cell Membranes': [],
82+
'Cytoplasm': [],
83+
'Nucleus': [],
84+
'Vesicles - ': ['Relation Count']}
7585

76-
relationships_with_stats = ['Dendrite Set', 'Dendrite Segments']
86+
# Selection of secondary relationships for which statistics are calculated: 'Total', 'Average'.
87+
relationships_with_stats = ['Dendrite Set', 'Dendrite Segments', 'Vesicles - ']
88+
89+
# Some statistics do not make any sense, so below are the ones to avoid
7790
relationship_measurements_stats_todrop = {'Branch Angle': 'Total',
7891
'Mean Diameter (µm)': 'Total'}
7992

@@ -301,8 +314,8 @@ def run(params):
301314
if do_multiple_files_as_cols:
302315
output_basename = 'Analysis_All results.xlsx'
303316
else:
304-
output_basename = '{}_grouped.xlsx'.format(os.path.basename(indiv_path_list[0]).split('.')[0])
305-
output_file = os.path.join(output_folder, output_basename)
317+
output_basename = '{}_grouped.xlsx'.format(''.join(os.path.basename(indiv_path_list[0]).split('.')[:-1]))
318+
output_file = os.path.join(output_folder, output_basename.replace('.aivia', ''))
306319

307320
df_grouped = {} # init
308321

@@ -317,7 +330,7 @@ def run(params):
317330
do_combine_meas_tabs = False # not possible as columns = measurements
318331

319332
# Detect multiwell batch
320-
process_wells = is_multiwell(well_ref_for_tables[0])
333+
process_wells = is_multiwell(well_ref_for_tables[0]) # TODO: Remove summary tab if True?
321334

322335
# First table in final table
323336
df_grouped = df_raw_1
@@ -434,6 +447,12 @@ def run(params):
434447
if do_combine_meas_tabs:
435448
df_grouped = combine_tabs(df_grouped)
436449

450+
# Collecting all measurements exact names
451+
all_meas_names = []
452+
for tmp_df in df_grouped.values():
453+
if not 'summary' in str(tmp_df.columns[0]).lower():
454+
all_meas_names.extend(tmp_df.columns[1:])
455+
437456
# Specific to neurons: split dendrite trees from segments
438457
if 'Dendrite Set' in df_grouped.keys():
439458
df_grouped_to_add = {}
@@ -457,22 +476,32 @@ def run(params):
457476
else:
458477
df_grouped[n] = df_grouped_tmp[n]
459478

460-
# Process relationships between object sets (see definition before this code)
461-
for rel_k in relationships.keys():
462-
# Check presence of primary object
479+
# Process relationships between object sets (see definition before the def run)
480+
for rel_k in relationships.keys(): # E.g. 'Cells'
481+
# Select all tabs where the primary object is # E.g. 'Cells.Cell_Cytoplasm Volume ...'
463482
for k in [it_k for it_k in df_grouped.keys() if rel_k in it_k]:
464-
k_suffix = k.replace(rel_k, '')
483+
k_suffix = k.replace(rel_k, '') # Important when multiple object sets existed (' (2)')
484+
485+
# Check presence of secondary object defined by relationships
465486
for rel_s in relationships[rel_k]:
466-
s = rel_s + k_suffix
467-
# Check presence of secondary object
468-
if s in df_grouped.keys():
469-
# Check presence of correct ID header in measurements in order to associate objects
470-
id_header = relationship_ID_headers[rel_k]
471-
if id_header in df_grouped[s].columns:
472-
prefix = s + '.'
473-
selected_meas = relationship_measurements[rel_s]
474-
df_grouped[k] = calculate_relation_stats(df_grouped[k], df_grouped[s], id_header,
475-
prefix, rel_s, selected_meas)
487+
# v1.60 gives the ability to provide only the beginning of the object name ('Vesicles - ')
488+
# It also provides relationships of multiple secondary objects beginning with the same name
489+
490+
for s in [it_s for it_s in df_grouped.keys() if is_same_object_set(it_s, k_suffix)]:
491+
492+
if rel_s in s: # positive match for secondary object
493+
# Check presence of correct ID header in measurements in order to associate objects
494+
id_header = relationship_ID_headers[rel_k]
495+
if id_header in df_grouped[s].columns:
496+
prefix = s + '.'
497+
selected_meas_prefixes = relationship_measurements[rel_s]
498+
499+
# See if prefixes exists in exact names of measurements (columns)
500+
for meas_prefix in selected_meas_prefixes:
501+
meas = [col for col in df_grouped[s].columns if meas_prefix in col]
502+
print('Collecting statistics ({}) from [{}] to be reported for [{}]'.format(meas, s, k))
503+
df_grouped[k] = calculate_relation_stats(df_grouped[k], df_grouped[s],
504+
id_header, prefix, rel_s, meas)
476505

477506
# Collecting summary values
478507
for k in df_grouped.keys():
@@ -567,7 +596,7 @@ def run(params):
567596
t += 1
568597

569598
# Adding percentages of objects if multiple object sets exists
570-
if do_combine_meas_tabs:
599+
if do_combine_meas_tabs: # TODO: for do_multiple_files_as_cols too??
571600
if len(total_counts) > 1:
572601
# Collect tab names without summary
573602
df_grouped_keys_nosum = [k for k in df_grouped.keys() if not k.endswith('Summary')]
@@ -596,7 +625,7 @@ def run(params):
596625
# Writing sheets to excel
597626
with pd.ExcelWriter(output_file, engine="openpyxl") as writer:
598627
# Write Summary first
599-
df_grouped[summary_lbl].to_excel(writer, sheet_name='Summary', index=False)
628+
df_grouped[summary_lbl].to_excel(writer, sheet_name=summary_lbl, index=False)
600629

601630
# Resizing columns
602631
for c in range(0, len(df_grouped[summary_lbl].columns)):
@@ -605,14 +634,14 @@ def run(params):
605634
len_longest_text = df_grouped[summary_lbl].iloc[:, c].map(str).str.len().max()
606635
writer.sheets[summary_lbl].column_dimensions[col_letter].width = len_longest_text * 1.5
607636

608-
for sh in [d for d in df_grouped.keys() if d != 'Summary']:
637+
for sh in [d for d in df_grouped.keys() if d != summary_lbl]:
609638
df_grouped[sh].to_excel(writer, sheet_name=sh, index=False)
610639

611640
# Resizing columns
612641
for c in range(0, len(df_grouped[sh].columns)):
613642
col_letter = openpyxl.utils.cell.get_column_letter(c + 1)
614643
len_longest_text = len(str(df_grouped[sh].columns[c]))
615-
if c == 0: # First column with measurement name and object names
644+
if c == 0 and df_grouped[sh].shape[0] > 1: # First column with measurement name and object names
616645
if len(str(df_grouped[sh].iloc[1, 0])) > len_longest_text:
617646
len_longest_text = len(str(df_grouped[sh].iloc[1, 0]))
618647
if len_longest_text < 10:
@@ -806,6 +835,8 @@ def combine_tabs(df_raw):
806835
def calculate_relation_stats(df_i, df_ii, id_header, meas_prefix, obj_ii_type, measurements):
807836
global relationships_with_stats, relationship_measurements_stats_todrop
808837

838+
# Measurements input needs to be a list
839+
809840
df_to_add = pd.DataFrame()
810841

811842
if set(df_ii.columns) & set(measurements):
@@ -870,6 +901,15 @@ def split_dendrite_set_and_segments(df):
870901
return dendrite_set_df, dendrite_seg_df
871902

872903

904+
# Function to distinguish 'Cells' from 'Cells (2)' and 'Cells (3)'. Used in relationship detection.
905+
def is_same_object_set(name, suffix):
906+
if suffix == '':
907+
ans = True if not name.endswith(')') else False
908+
else:
909+
ans = True if name.endswith(suffix) else False
910+
return ans
911+
912+
873913
def get_split_name(txt: str):
874914
# Check if previous object set name is present in txt
875915
# prev_obj_name = '' if it is for the first measurement tab or if there is only one object set with no child objects
@@ -978,3 +1018,6 @@ def Mbox(title, text, style):
9781018
# Also fixing a bug with formatting of 'Analysis_Summary' when not multiwell
9791019
# Fixed wrong sorting of subfolders such as 'Job 9', 'Job 10', etc.
9801020
# v1.55: - New virtual env code for auto-activation
1021+
# v1.56: - Bug fix since Aivia 12.0 (r38705) security release for scenario F where only the summary tab is output
1022+
# v1.60: - Add Cell Analysis support for relationship grouping. Better recognition of object sets with numbers '(1)'
1023+
# - Bug fixed at line 660 (if result table is empty)

0 commit comments

Comments
 (0)