diff --git a/ifcb/data/export.py b/ifcb/data/export.py new file mode 100644 index 0000000..be2ae07 --- /dev/null +++ b/ifcb/data/export.py @@ -0,0 +1,58 @@ +from zipfile import ZipFile + +from PIL import Image +import pandas as pd +from io import BytesIO + +import ifcb +from ifcb.data.adc import SCHEMA_VERSION_1 +from ifcb.data.stitching import InfilledImages + + +def to_ecotaxa(b, zip_path=None): + + if zip_path is None: + zip_path = f'{b.lid}.zip' + + with ZipFile(zip_path, 'w') as fout: + records = [] + + if b.schema == SCHEMA_VERSION_1: + images = InfilledImages(b) + else: + images = b.images + + for roi_number, image_data in images.items(): + + object_id = ifcb.Pid(b.lid).with_target(roi_number) + img_file_name = f'{object_id}.png' + + record = { + 'sample_id': b.lid, + 'object_id': object_id, + 'img_file_name': img_file_name, + 'object_date': b.timestamp.strftime('%Y%m%d'), + 'object_time': b.timestamp.strftime('%H%M%S'), + } + + records.append(record) + + image = Image.fromarray(image_data) + + # Convert the image to bytes + img_byte_arr = BytesIO() + image.save(img_byte_arr, format='PNG') + img_byte_arr = img_byte_arr.getvalue() + + # Write the bytes to the zip file + fout.writestr(img_file_name, img_byte_arr) + + tsv_filename = 'ecotaxa_metadata.tsv' + df = pd.DataFrame(records) + buffer = BytesIO() + buffer.write('\t'.join(df.columns).encode() + b'\n') + buffer.write('\t'.join('[t]' for _ in range(3)).encode() + b'\t') + buffer.write('\t'.join('[f]' for _ in range(len(df.columns) - 3)).encode() + b'\n') + df.to_csv(buffer, sep='\t', index=False, header=None) + + fout.writestr(tsv_filename, buffer.getvalue())