We use cookies on this website to distinguish you from other users.
We use this data to improve our content experience and for targeted advertising.
By continuing to use this website you consent to our use of cookies.
For more information, please see our
Cookie Policy.
Source code for pydiogment.auga
"""
- Description: amplitude based augmentation techniques/manipulations for audio data.
"""
import os
import numpy as np
from .utils.io import read_file, write_file
[docs]def apply_gain(infile, gain):
"""
Apply gain to infile.
Args:
infile (str) : input filename/path.
gain (float) : gain in dB (both positive and negative).
"""
# read input file
fs, x = read_file(filename=infile)
# apply gain
x = np.copy(x)
x = x * (10**(gain / 10.0))
x = np.minimum(np.maximum(-1.0, x), 1.0)
x /= np.mean(np.abs(x))
# export data to file
output_file_path = os.path.dirname(infile)
name_attribute = "_augmented_with_%s_gain.wav" % str(gain)
write_file(output_file_path=output_file_path,
input_file_name=infile,
name_attribute=name_attribute,
sig=x,
fs=fs)
[docs]def add_noise(infile, snr):
"""
Augment data using noise injection.
Note:
It simply add some random values to the input file data based on the snr.
Args:
infile (str) : input filename/path.
snr (int) : signal to noise ratio in dB.
"""
# read input file
fs, sig = read_file(filename=infile)
# compute and apply noise
noise = np.random.randn(len(sig))
# compute powers
noise_power = np.mean(np.power(noise, 2))
sig_power = np.mean(np.power(sig, 2))
# compute snr and scaling factor
snr_linear = 10**(snr / 10.0)
noise_factor = (sig_power / noise_power) * (1 / snr_linear)
# add noise
y = sig + np.sqrt(noise_factor) * noise
# construct file names
output_file_path = os.path.dirname(infile)
name_attribute = "_augmented_%s_noisy.wav" % snr
# export data to file
write_file(output_file_path=output_file_path,
input_file_name=infile,
name_attribute=name_attribute,
sig=y,
fs=fs)
[docs]def fade_in_and_out(infile):
"""
Add a fade in and out effect to the audio file.
Args:
infile (str) : input filename/path.
"""
# read input file
fs, sig = read_file(filename=infile)
window = np.hamming(len(sig))
# construct file names
output_file_path = os.path.dirname(infile)
name_attribute = "_augmented_fade_in_out.wav"
# fade in and out
window = np.hamming(len(sig))
augmented_sig = window * sig
augmented_sig /= np.mean(np.abs(augmented_sig))
# export data to file
write_file(output_file_path=output_file_path,
input_file_name=infile,
name_attribute=name_attribute,
sig=augmented_sig,
fs=fs)
[docs]def normalize(infile, normalization_technique="peak", rms_level=0):
"""
Normalize the signal given a certain technique (peak or rms).
Args:
infile (str) : input filename/path.
normalization_technique (str) : type of normalization technique to use. (default is peak)
rms_level (int) : rms level in dB.
"""
# read input file
fs, sig = read_file(filename=infile)
# normalize signal
if normalization_technique == "peak" :
y = sig / np.max(sig)
elif normalization_technique == "rms":
# linear rms level and scaling factor
r = 10**(rms_level / 10.0)
a = np.sqrt( (len(sig) * r**2) / np.sum(sig**2) )
# normalize
y = sig * a
else :
print("ParameterError: Unknown normalization_technique variable.")
# construct file names
output_file_path = os.path.dirname(infile)
name_attribute = "_augmented_{}_normalized.wav".format(normalization_technique)
# export data to file
write_file(output_file_path=output_file_path,
input_file_name=infile,
name_attribute=name_attribute,
sig=y,
fs=fs)