SDI-ResNet/VISION_mel.py at main · ckorgial/SDI-ResNet · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import os
import glob
import librosa
import librosa.display
import numpy as np

def process_audio(wav_file_path, output_directory):
    # Load the audio file
    y, sr = librosa.load(wav_file_path)

    # Generate Mel spectrogram
    mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=2048, hop_length=512)

    # Convert to decibels
    mel_spectrogram_db = librosa.power_to_db(mel_spectrogram, ref=np.max) # log

    # Save the Mel spectrogram as a NumPy file
    output_filename = os.path.splitext(os.path.basename(wav_file_path))[0] + '_mel.npy'
    output_path = os.path.join(output_directory, output_filename)
    np.save(output_path, mel_spectrogram_db)

def process_device_folder(base_directory, output_base_directory):
    # Navigate through each device folder and its subfolders
    for device_folder in glob.glob(os.path.join(base_directory, '*')):
        device_name = os.path.basename(device_folder)
        for recording_type in ['flat', 'flatWA', 'flatYT', 'indoor', 'indoorWA', 'indoorYT', 'outdoor', 'outdoorWA', 'outdoorYT']:
            # Build the path to the subfolder
            subfolder_path = os.path.join(device_folder, recording_type)
            # Build the output path corresponding to the current subfolder
            output_subfolder_path = os.path.join(output_base_directory, device_name, recording_type)

            # Create the output directory if it doesn't exist
            os.makedirs(output_subfolder_path, exist_ok=True)

            # Process each WAV file in the subfolder
            for wav_file in glob.glob(os.path.join(subfolder_path, '*.wav')):
                print(f"Processing file: {wav_file}")
                # Call the process_audio function to handle the processing and saving of the entire audio
                process_audio(wav_file, output_subfolder_path)

# Set the base directory where the device folders are located
base_directory = './Device_Identification_Fusion/video2audio/VISION_video2audio_44100/'
# Set the base output directory where the .npy files will be saved
output_base_directory = './XAI_VISION/VISION_mel_all_44100/'

# Process audio for each device folder
process_device_folder(base_directory, output_base_directory)