bsed/transform_annotations.py at master · maxcrous/bsed · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
"""
This file contains code for converting a json file containing audio event
annotations to a txt annotation file, which is easier to parse both for humans and python.

Use as follows:

    $python3 transform_annotations.py --input_directory ~/dir_with_jsons \
                                      --output_directory ~/output_dir_for_txts

"""

import json
from glob import glob
import os
import argparse


def annotation_json_to_txt(annotation_json_path, output_path):
    """ Takes a json sound annotation file generated by dynitag and produces
        a txt annotation file that is easier to parse for training. """

    with open(annotation_json_path) as input_file:
        json_data = json.load(input_file)

    id_to_species = dict()
    desired_bird_class = 1

    for idx, species in enumerate(json_data['annotation_tags']):
        bird_species = species['name']
        id_to_species[idx + 1] = bird_species.strip()

    with open(output_path, 'w') as output_file:
        for idx, audio_metadata in enumerate(json_data['audios']):
            file_name = audio_metadata['rel_path']
            annotations = audio_metadata['annotations']
            if enough_calls(annotations, desired_bird_class):

                for idy, annotation in enumerate(annotations):
                    start_time = annotation['start_time']
                    end_time = annotation['end_time']
                    tag_id = annotation['annotationtag_id']
                    species = id_to_species[tag_id]
                    output_file.write('{}\t{}\t{}\t{}\n'.format(file_name, start_time, end_time, species))


def enough_calls(annotations, bird_class):
    """ Returns true if an audio file is sufficiently annotated.
        Sufficiently means that the file has at least one annotation
        of a bird species.
    """

    desired_bird_count = 0

    for annotation in annotations:
        if annotation['annotationtag_id'] == bird_class:
            desired_bird_count += 1

    return desired_bird_count


if __name__ == '__main__':
    """ Takes a directory and transforms all json sound annotation files to txt annotation files. """
    parser = argparse.ArgumentParser()
    parser.add_argument('--input_directory', help='The directory containing the json files.', default='./')
    parser.add_argument('--output_directory', help='The directory where output txt files are saved.', default='./')
    args = parser.parse_args()
    regex = os.path.join(args.input_directory, '*.json')

    for json_path in glob(regex):
        basename = os.path.basename(json_path)
        basename_wo_extension, extension = os.path.splitext(basename)
        output_name = basename_wo_extension + '.txt'
        output_path = os.path.join(args.output_directory, output_name)

        annotation_json_to_txt(json_path, output_path)