From 70033a6b29f05d8e43ee39041ac8bf1d2ac72d56 Mon Sep 17 00:00:00 2001 From: Joe Futrelle Date: Tue, 1 Jul 2025 14:18:46 -0400 Subject: [PATCH 1/5] export to ecotaxa format --- ifcb/data/export.py | 53 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 ifcb/data/export.py diff --git a/ifcb/data/export.py b/ifcb/data/export.py new file mode 100644 index 0000000..153a744 --- /dev/null +++ b/ifcb/data/export.py @@ -0,0 +1,53 @@ +from zipfile import ZipFile + +from PIL import Image +import pandas as pd +from io import BytesIO + +import ifcb + + +def to_ecotaxa(b, zip_path=None): + + if zip_path is None: + zip_path = f'{b.lid}.zip' + + with ZipFile(zip_path, 'w') as fout: + records = [] + + for roi_number, image_data in b.images.items(): + + object_id = ifcb.Pid(b.lid).with_target(roi_number) + img_file_name = f'{object_id}.png' + + record = { + 'sample_id': b.lid, + 'object_id': object_id, + 'img_file_name': img_file_name, + 'object_date': b.timestamp.strftime('%Y%m%d'), + 'object_time': b.timestamp.strftime('%H%M%S'), + } + + records.append(record) + + image = Image.fromarray(image_data) + + # Convert the image to bytes + img_byte_arr = BytesIO() + image.save(img_byte_arr, format='PNG') + img_byte_arr = img_byte_arr.getvalue() + + # Write the bytes to the zip file + fout.writestr(img_file_name, img_byte_arr) + + df = pd.DataFrame(records) + buffer = BytesIO() + tsv_filename = f'ecotaxa_metadata.tsv' + df = pd.DataFrame(records) + buffer = BytesIO() + buffer.write('\t'.join(df.columns).encode() + b'\n') + buffer.write('\t'.join('[t]' for _ in range(3)).encode() + b'\t') + buffer.write('\t'.join('[f]' for _ in range(len(df.columns) - 3)).encode() + b'\n') + df.to_csv(buffer, sep='\t', index=False, header=None) + + fout.writestr('ecotaxa_metadata.tsv', buffer.getvalue()) From 46b881564febbf0165ea5587fbfa1911132d9684 Mon Sep 17 00:00:00 2001 From: Joe Futrelle Date: Tue, 1 Jul 2025 14:56:29 -0400 Subject: [PATCH 2/5] remove duplicate DataFrame creation Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- ifcb/data/export.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ifcb/data/export.py b/ifcb/data/export.py index 153a744..43e2a70 100644 --- a/ifcb/data/export.py +++ b/ifcb/data/export.py @@ -40,7 +40,6 @@ def to_ecotaxa(b, zip_path=None): # Write the bytes to the zip file fout.writestr(img_file_name, img_byte_arr) - df = pd.DataFrame(records) buffer = BytesIO() tsv_filename = f'ecotaxa_metadata.tsv' df = pd.DataFrame(records) From 5c6586c1330e6b96e8ed184182efb55965414d09 Mon Sep 17 00:00:00 2001 From: Joe Futrelle Date: Tue, 1 Jul 2025 14:57:40 -0400 Subject: [PATCH 3/5] remove duplicate buffer creation Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- ifcb/data/export.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ifcb/data/export.py b/ifcb/data/export.py index 43e2a70..e1b7da9 100644 --- a/ifcb/data/export.py +++ b/ifcb/data/export.py @@ -40,7 +40,6 @@ def to_ecotaxa(b, zip_path=None): # Write the bytes to the zip file fout.writestr(img_file_name, img_byte_arr) - buffer = BytesIO() tsv_filename = f'ecotaxa_metadata.tsv' df = pd.DataFrame(records) buffer = BytesIO() From 4dd7c72fab2733827d1cf6465387bb93f4bec847 Mon Sep 17 00:00:00 2001 From: Joe Futrelle Date: Wed, 2 Jul 2025 06:42:34 -0400 Subject: [PATCH 4/5] specify tsv filename once --- ifcb/data/export.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ifcb/data/export.py b/ifcb/data/export.py index e1b7da9..cf69078 100644 --- a/ifcb/data/export.py +++ b/ifcb/data/export.py @@ -40,7 +40,7 @@ def to_ecotaxa(b, zip_path=None): # Write the bytes to the zip file fout.writestr(img_file_name, img_byte_arr) - tsv_filename = f'ecotaxa_metadata.tsv' + tsv_filename = 'ecotaxa_metadata.tsv' df = pd.DataFrame(records) buffer = BytesIO() buffer.write('\t'.join(df.columns).encode() + b'\n') @@ -48,4 +48,4 @@ def to_ecotaxa(b, zip_path=None): buffer.write('\t'.join('[f]' for _ in range(len(df.columns) - 3)).encode() + b'\n') df.to_csv(buffer, sep='\t', index=False, header=None) - fout.writestr('ecotaxa_metadata.tsv', buffer.getvalue()) + fout.writestr(tsv_filename, buffer.getvalue()) From a47ad3f98425d0fdb8c99a49b1d924704bb8d9b0 Mon Sep 17 00:00:00 2001 From: Joe Futrelle Date: Wed, 2 Jul 2025 11:40:04 -0400 Subject: [PATCH 5/5] adding stitching --- ifcb/data/export.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/ifcb/data/export.py b/ifcb/data/export.py index cf69078..be2ae07 100644 --- a/ifcb/data/export.py +++ b/ifcb/data/export.py @@ -5,6 +5,8 @@ from io import BytesIO import ifcb +from ifcb.data.adc import SCHEMA_VERSION_1 +from ifcb.data.stitching import InfilledImages def to_ecotaxa(b, zip_path=None): @@ -15,7 +17,12 @@ def to_ecotaxa(b, zip_path=None): with ZipFile(zip_path, 'w') as fout: records = [] - for roi_number, image_data in b.images.items(): + if b.schema == SCHEMA_VERSION_1: + images = InfilledImages(b) + else: + images = b.images + + for roi_number, image_data in images.items(): object_id = ifcb.Pid(b.lid).with_target(roi_number) img_file_name = f'{object_id}.png'