-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathtransform_annotations.py
More file actions
75 lines (55 loc) · 2.76 KB
/
transform_annotations.py
File metadata and controls
75 lines (55 loc) · 2.76 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
"""
This file contains code for converting a json file containing audio event
annotations to a txt annotation file, which is easier to parse both for humans and python.
Use as follows:
$python3 transform_annotations.py --input_directory ~/dir_with_jsons \
--output_directory ~/output_dir_for_txts
"""
import json
from glob import glob
import os
import argparse
def annotation_json_to_txt(annotation_json_path, output_path):
""" Takes a json sound annotation file generated by dynitag and produces
a txt annotation file that is easier to parse for training. """
with open(annotation_json_path) as input_file:
json_data = json.load(input_file)
id_to_species = dict()
desired_bird_class = 1
for idx, species in enumerate(json_data['annotation_tags']):
bird_species = species['name']
id_to_species[idx + 1] = bird_species.strip()
with open(output_path, 'w') as output_file:
for idx, audio_metadata in enumerate(json_data['audios']):
file_name = audio_metadata['rel_path']
annotations = audio_metadata['annotations']
if enough_calls(annotations, desired_bird_class):
for idy, annotation in enumerate(annotations):
start_time = annotation['start_time']
end_time = annotation['end_time']
tag_id = annotation['annotationtag_id']
species = id_to_species[tag_id]
output_file.write('{}\t{}\t{}\t{}\n'.format(file_name, start_time, end_time, species))
def enough_calls(annotations, bird_class):
""" Returns true if an audio file is sufficiently annotated.
Sufficiently means that the file has at least one annotation
of a bird species.
"""
desired_bird_count = 0
for annotation in annotations:
if annotation['annotationtag_id'] == bird_class:
desired_bird_count += 1
return desired_bird_count
if __name__ == '__main__':
""" Takes a directory and transforms all json sound annotation files to txt annotation files. """
parser = argparse.ArgumentParser()
parser.add_argument('--input_directory', help='The directory containing the json files.', default='./')
parser.add_argument('--output_directory', help='The directory where output txt files are saved.', default='./')
args = parser.parse_args()
regex = os.path.join(args.input_directory, '*.json')
for json_path in glob(regex):
basename = os.path.basename(json_path)
basename_wo_extension, extension = os.path.splitext(basename)
output_name = basename_wo_extension + '.txt'
output_path = os.path.join(args.output_directory, output_name)
annotation_json_to_txt(json_path, output_path)