joefutrelle · joefutrelle · Jul 2, 2025 · Jul 1, 2025 · Jul 1, 2025 · Jul 1, 2025
diff --git a/ifcb/data/export.py b/ifcb/data/export.py
@@ -0,0 +1,58 @@
+from zipfile import ZipFile
+
+from PIL import Image
+import pandas as pd
+from io import BytesIO
+
+import ifcb
+from ifcb.data.adc import SCHEMA_VERSION_1
+from ifcb.data.stitching import InfilledImages
+
+
+def to_ecotaxa(b, zip_path=None):
+
+    if zip_path is None:
+        zip_path = f'{b.lid}.zip'
+
+    with ZipFile(zip_path, 'w') as fout:
+        records = []
+
+        if b.schema == SCHEMA_VERSION_1:
+            images = InfilledImages(b)
+        else:
+            images = b.images
+
+        for roi_number, image_data in images.items():
+
+            object_id = ifcb.Pid(b.lid).with_target(roi_number)
+            img_file_name = f'{object_id}.png'
+
+            record = {
+                'sample_id': b.lid,
+                'object_id': object_id,
+                'img_file_name': img_file_name,
+                'object_date': b.timestamp.strftime('%Y%m%d'),
+                'object_time': b.timestamp.strftime('%H%M%S'),
+            }
+
+            records.append(record)
+
+            image = Image.fromarray(image_data)
+
+            # Convert the image to bytes
+            img_byte_arr = BytesIO()
+            image.save(img_byte_arr, format='PNG')
+            img_byte_arr = img_byte_arr.getvalue()
+
+            # Write the bytes to the zip file
+            fout.writestr(img_file_name, img_byte_arr)
+
+        tsv_filename = 'ecotaxa_metadata.tsv'
+        df = pd.DataFrame(records)
+        buffer = BytesIO()
+        buffer.write('\t'.join(df.columns).encode() + b'\n')
+        buffer.write('\t'.join('[t]' for _ in range(3)).encode() + b'\t')
+        buffer.write('\t'.join('[f]' for _ in range(len(df.columns) - 3)).encode() + b'\n')
+        df.to_csv(buffer, sep='\t', index=False, header=None)
+
+        fout.writestr(tsv_filename, buffer.getvalue())