We use cookies on this website to distinguish you from other users.
We use this data to improve our content experience and for targeted advertising.
By continuing to use this website you consent to our use of cookies.
For more information, please see our
Cookie Policy.
Source code for pydiogment.augt
"""
- Description: time based augmentation techniques/manipulations for audio data.
"""
import os
import math
import random
import warnings
import subprocess
import numpy as np
from .utils.io import read_file, write_file
[docs]def eliminate_silence(infile):
"""
Eliminate silence from voice file using ffmpeg library.
Args:
infile (str) : Path to get the original voice file from.
Returns:
list including True for successful authentication, False otherwise and
a percentage value representing the certainty of the decision.
"""
# define output name if none specified
output_path = infile.split(".wav")[0] + "_augmented_without_silence.wav"
# filter silence in wav
remove_silence_command = ["ffmpeg", "-i", infile,
"-af",
"silenceremove=stop_periods=-1:stop_duration=0.25:stop_threshold=-36dB",
"-acodec", "pcm_s16le",
"-ac", "1", output_path]
out = subprocess.Popen(remove_silence_command,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
out.wait()
with_silence_duration = os.popen(
"ffprobe -i '" + infile +
"' -show_format -v quiet | sed -n 's/duration=//p'").read()
no_silence_duration = os.popen(
"ffprobe -i '" + output_path +
"' -show_format -v quiet | sed -n 's/duration=//p'").read()
return with_silence_duration, no_silence_duration
[docs]def random_cropping(infile, min_len=1):
"""
Crop the infile with an input minimum duration.
Args:
infile (str) : Input filename.
min_len (float) : Minimum duration for randomly cropped excerpt
"""
fs, x = read_file(filename=infile)
t_end = x.size / fs
if (t_end > min_len):
# get start and end time
start = random.uniform(0.0, t_end - min_len)
end = random.uniform(start + min_len, t_end)
# crop data
y = x[int(math.floor(start * fs)):int(math.ceil(end * fs))]
# construct file names
output_file_path = os.path.dirname(infile)
name_attribute = "_augmented_randomly_cropped_%s.wav" % str(min_len)
# export data to file
write_file(output_file_path=output_file_path,
input_file_name=infile,
name_attribute=name_attribute,
sig=y,
fs=fs)
else:
warning_msg = """
min_len provided is greater than the duration of the song.
"""
warnings.warn(warning_msg)
[docs]def slow_down(input_file, coefficient=0.8):
"""
Slow or stretch a wave.
Args:
infile (str) : Input filename.
coefficient (float) : coefficient caracterising the slowing degree.
"""
# set-up variables for paths and file names
name_attribute = "_augmented_slowed.wav"
output_file = input_file.split(".wav")[0] + name_attribute
# apply slowing command
slowing_command = ["ffmpeg", "-i", input_file, "-filter:a",
"atempo={0}".format(str(coefficient)),
output_file]
print(" ".join(slowing_command))
p = subprocess.Popen(slowing_command,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
output, error = p.communicate()
print(output, error.decode("utf-8") )
# for i in error.decode("utf-8") : print(i)
print("Writing data to " + output_file + ".")
[docs]def speed(input_file, coefficient=1.25):
"""
Speed or shrink a wave.
Args:
infile (str) : Input filename.
coefficient (float) : coefficient caracterising the speeding degree.
"""
# set-up variables for paths and file names
name_attribute = "_augmented_speeded.wav"
output_file = input_file.split(".wav")[0] + name_attribute
# apply slowing command
speeding_command = ["ffmpeg", "-i", input_file, "-filter:a",
"atempo={0}".format(str(coefficient)),
output_file]
_ = subprocess.Popen(speeding_command,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
print("Writing data to " + output_file + ".")
[docs]def shift_time(infile, tshift, direction):
"""
Augment audio data by shifting the time in the file. Signal can be shifted
to the left or right.
Note:
Time shifting is simply moving the audio to left/right with a random second.
If shifting audio to left (fast forward) with x seconds, first x seconds will mark as 0 (i.e. silence).
If shifting audio to right (back forward) with x seconds, last x seconds will mark as 0 (i.e. silence).
Args:
infile (str) : Input filename.
tshift (int) : Signal time shift in seconds.
direction (str) : shift direction (to the left or right).
"""
fs, sig = read_file(filename=infile)
shift = int(tshift * fs) * int(direction == "left") - \
int(tshift * fs) * int(direction == "right")
# shift time
augmented_sig = np.roll(sig, shift)
# construct file names
output_file_path = os.path.dirname(infile)
name_attribute = "_augmented_%s_%s_shifted.wav" % (direction, tshift)
# export data to file
write_file(output_file_path=output_file_path,
input_file_name=infile,
name_attribute=name_attribute,
sig=augmented_sig,
fs=fs)
[docs]def reverse(infile):
"""
Inverses the input signal to play from the end to the beginning and writes it
to an output file
Args:
infile (str): Input filename.
"""
fs, sig = read_file(filename=infile)
augmented_sig = sig[::-1]
# construct file names
output_file_path = os.path.dirname(infile)
name_attribute = "_augmented_reversed.wav"
# export data to file
write_file(output_file_path=output_file_path,
input_file_name=infile,
name_attribute=name_attribute,
sig=augmented_sig,
fs=fs)
[docs]def resample_audio(infile, sr):
"""
Resample the signal according a new input sampling rate with respect to the
Nyquist-Shannon theorem.
Args:
infile (str) : input filename/path.
sr (int) : new sampling rate.
"""
# set-up variables for paths and file names
output_file = "{0}_augmented_resampled_to_{1}.wav".format(infile.split(".wav")[0],
sr)
# apply slowing command
sampling_command = ["ffmpeg", "-i", infile, "-ar", str(sr), output_file]
print(" ".join(sampling_command))
_ = subprocess.Popen(sampling_command,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)