We use cookies on this website to distinguish you from other users.
We use this data to improve our content experience and for targeted advertising.
By continuing to use this website you consent to our use of cookies.
For more information, please see our
Cookie Policy.
Source code for spafe.fbanks.mel_fbanks
"""
- Description : Mel filter banks implementation.
- Copyright (c) 2019-2024 Ayoub Malek.
This source code is licensed under the terms of the BSD 3-Clause License.
For a copy, see <https://github.com/SuperKogito/spafe/blob/master/LICENSE>.
"""
from typing import Optional
import numpy as np
from ..utils.converters import hz2mel, mel2hz, MelConversionApproach
from ..utils.exceptions import ParameterError, ErrorMsgs
from ..utils.filters import scale_fbank, ScaleType
[docs]def mel_filter_banks_helper(
nfilts: int = 24,
nfft: int = 512,
fs: int = 16000,
low_freq: float = 0,
high_freq: Optional[float] = None,
scale: ScaleType = "constant",
fb_type="mel",
conversion_approach: MelConversionApproach = "Oshaghnessy",
):
"""
Compute Mel-filter banks.The filters are stored in the rows, the columns
correspond to fft bins.
Args:
nfilts (int) : the number of filters in the filter bank.
(Default 20).
nfft (int) : the FFT size.
(Default is 512).
fs (int) : sample rate/ sampling frequency of the signal.
(Default 16000 Hz).
low_freq (float) : lowest band edge of mel filters.
(Default 0 Hz).
high_freq (float) : highest band edge of mel filters.
(Default samplerate/2).
scale (str) : monotonicity behavior of the filter banks.
(Default is "constant").
fb_type (str) : the filter banks type.
(Default is "mel")
conversion_approach (str) : mel scale conversion approach.
(Default is "Oshaghnessy").
Returns:
(tuple) :
- (numpy.ndarray) : array of size nfilts * (nfft/2 + 1) containing filter bank. Each row holds 1 filter.
- (numpy.ndarray) : array of center frequencies
Tip:
- :code:`scale` : can take the following options ["constant", "ascendant", "descendant"].
- :code:`fb_type` : can take the following options ["mel", "lin"].
- :code:`conversion_approach` : can take the following options ["Oshaghnessy", "Beranek", "Lindsay"].
Note that the use of different options than the default can lead to unexpected behavior/issues.
"""
# init freqs
high_freq = high_freq or fs / 2
# run checks
if low_freq < 0:
raise ParameterError(ErrorMsgs["low_freq"])
if high_freq > (fs / 2):
raise ParameterError(ErrorMsgs["high_freq"])
if fb_type == "mel":
# convert bounding freqs
low_mel = hz2mel(low_freq, conversion_approach)
high_mel = hz2mel(high_freq, conversion_approach)
# compute points evenly spaced in mels (ponts are in Hz)
delta_mel = abs(high_mel - low_mel) / (nfilts + 1)
scale_freqs = low_mel + delta_mel * np.arange(0, nfilts + 2)
# assign freqs
lower_edges_mel = scale_freqs[:-2]
upper_edges_mel = scale_freqs[2:]
center_freqs_mel = scale_freqs[1:-1]
# assign freqs in
center_freqs_hz = mel2hz(center_freqs_mel, conversion_approach)
lower_edges_hz = mel2hz(lower_edges_mel, conversion_approach)
upper_edges_hz = mel2hz(upper_edges_mel, conversion_approach)
center_freqs = center_freqs_mel
else:
# compute points evenly spaced in frequency (points are in Hz)
delta_hz = abs(high_freq - low_freq) / (nfilts + 1)
scale_freqs = low_freq + delta_hz * np.arange(0, nfilts + 2)
# assign freqs
lower_edges_hz = scale_freqs[:-2]
upper_edges_hz = scale_freqs[2:]
center_freqs_hz = scale_freqs[1:-1]
center_freqs = center_freqs_hz
freqs = np.linspace(low_freq, high_freq, nfft // 2 + 1)
fbank = np.zeros((nfilts, nfft // 2 + 1))
for j, (center, lower, upper) in enumerate(
zip(center_freqs_hz, lower_edges_hz, upper_edges_hz)
):
left_slope = (freqs >= lower) == (freqs <= center)
fbank[j, left_slope] = (freqs[left_slope] - lower) / (center - lower)
right_slope = (freqs >= center) == (freqs <= upper)
fbank[j, right_slope] = (upper - freqs[right_slope]) / (upper - center)
# compute scaling
scaling = scale_fbank(scale=scale, nfilts=nfilts)
fbank = fbank * scaling
return fbank, center_freqs
[docs]def mel_filter_banks(
nfilts: int = 24,
nfft: int = 512,
fs: int = 16000,
low_freq: float = 0,
high_freq: Optional[float] = None,
scale: ScaleType = "constant",
conversion_approach: MelConversionApproach = "Oshaghnessy",
):
"""
Compute Mel-filter banks.The filters are stored in the rows, the columns
correspond to fft bins.
Args:
nfilts (int) : the number of filters in the filter bank.
(Default 20).
nfft (int) : the FFT size.
(Default is 512).
fs (int) : sample rate/ sampling frequency of the signal.
(Default 16000 Hz).
low_freq (float) : lowest band edge of mel filters.
(Default 0 Hz).
high_freq (float) : highest band edge of mel filters.
(Default samplerate/2).
scale (str) : monotonicity behavior of the filter banks.
(Default is "constant").
conversion_approach (str) : mel scale conversion approach.
(Default is "Oshaghnessy").
Returns:
(tuple) :
- (numpy.ndarray) : array of size nfilts * (nfft/2 + 1) containing filter bank. Each row holds 1 filter.
- (numpy.ndarray) : array of center frequencies
Tip:
- :code:`scale` : can take the following options ["constant", "ascendant", "descendant"].
- :code:`conversion_approach` : can take the following options ["Oshaghnessy", "Beranek", "Lindsay"].
Note that the use of different options than the default can lead to unexpected behavior/issues.
Examples:
.. plot::
import numpy as np
from spafe.utils.converters import mel2hz
from spafe.utils.vis import show_fbanks
from spafe.fbanks.mel_fbanks import mel_filter_banks
# init var
fs = 8000
nfilt = 7
nfft = 1024
low_freq = 0
high_freq = fs / 2
# compute freqs for xaxis
mhz_freqs = np.linspace(low_freq, high_freq, nfft //2+1)
for scale, label in [("constant", ""), ("ascendant", "Ascendant "), ("descendant", "Descendant ")]:
# compute fbank
mel_fbanks_mat, mel_freqs = mel_filter_banks(nfilts=nfilt,
nfft=nfft,
fs=fs,
low_freq=low_freq,
high_freq=high_freq,
scale=scale)
# visualize fbank
show_fbanks(
mel_fbanks_mat,
[mel2hz(freq) for freq in mel_freqs],
mhz_freqs,
label + "Mel Filter Bank",
ylabel="Weight",
x1label="Frequency / Hz",
x2label="Frequency / Mel",
figsize=(14, 5),
fb_type="mel")
"""
# generate mel fbanks by inversing regular mel fbanks
mel_fbanks, mel_freqs = mel_filter_banks_helper(
nfilts=nfilts,
nfft=nfft,
fs=fs,
low_freq=low_freq,
high_freq=high_freq,
scale=scale,
fb_type="mel",
conversion_approach=conversion_approach,
)
return mel_fbanks, mel_freqs
[docs]def inverse_mel_filter_banks(
nfilts: int = 24,
nfft: int = 512,
fs: int = 16000,
low_freq: float = 0,
high_freq: Optional[float] = None,
scale: ScaleType = "constant",
conversion_approach: MelConversionApproach = "Oshaghnessy",
):
"""
Compute inverse Mel-filter banks. The filters are stored in the rows, the columns
correspond to fft bins.
Args:
nfilt (int) : the number of filters in the filter bank.
(Default 20).
nfft (int) : the FFT size.
(Default is 512).
fs (int) : sample rate/ sampling frequency of the signal.
(Default 16000 Hz).
low_freq (float) : lowest band edge of mel filters.
(Default 0 Hz).
high_freq (float) : highest band edge of mel filters.
(Default samplerate/2).
scale (str) : monotonicity behavior of the filter banks.
(Default is "constant").
conversion_approach (str) : mel scale conversion approach.
(Default is "Oshaghnessy").
Returns:
(tuple) :
- (numpy.ndarray) : array of size nfilts * (nfft/2 + 1) containing filter bank. Each row holds 1 filter.
- (numpy.ndarray) : array of center frequencies
Tip:
- :code:`scale` : can take the following options ["constant", "ascendant", "descendant"].
- :code:`conversion_approach` : can take the following options ["Oshaghnessy", "Beranek", "Lindsay"].
Note that the use of different options than the default can lead to unexpected behavior/issues.
Examples:
.. plot::
import numpy as np
from spafe.utils.converters import mel2hz
from spafe.utils.vis import show_fbanks
from spafe.fbanks.mel_fbanks import inverse_mel_filter_banks
# init var
fs = 8000
nfilt = 7
nfft = 1024
low_freq = 0
high_freq = fs / 2
# compute freqs for xaxis
mhz_freqs = np.linspace(low_freq, high_freq, nfft //2+1)
for scale, label in [("constant", ""), ("ascendant", "Ascendant "), ("descendant", "Descendant ")]:
# compute fbank
imel_fbanks_mat, imel_freqs = inverse_mel_filter_banks(nfilts=nfilt,
nfft=nfft,
fs=fs,
low_freq=low_freq,
high_freq=high_freq,
scale=scale)
# visualize fbank
show_fbanks(
imel_fbanks_mat,
[mel2hz(freq) for freq in imel_freqs],
mhz_freqs,
label + "Inverse Mel Filter Bank",
ylabel="Weight",
x1label="Frequency / Hz",
x2label="Frequency / Mel",
figsize=(14, 5),
fb_type="mel")
See Also:
- :py:func:`spafe.fbanks.gammatone_fbanks.gammatone_filter_banks`
- :py:func:`spafe.fbanks.linear_fbanks.linear_filter_banks`
- :py:func:`spafe.fbanks.bark_fbanks.bark_filter_banks`
"""
# inverse scaler value
scales = {
"ascendant": "descendant",
"descendant": "ascendant",
"constant": "constant",
}
# generate inverse mel fbanks by inversing regular mel fbanks
imel_fbanks, mel_freqs = mel_filter_banks_helper(
nfilts=nfilts,
nfft=nfft,
fs=fs,
low_freq=low_freq,
high_freq=high_freq,
scale=scales[scale],
fb_type="mel",
conversion_approach=conversion_approach,
)
# inverse regular filter banks
for i, pts in enumerate(imel_fbanks):
imel_fbanks[i] = pts[::-1]
# get inverse freqs
imel_center_freqs = [
hz2mel(freq, conversion_approach)
for freq in (high_freq - mel2hz(mel_freqs, conversion_approach))
]
return np.abs(imel_fbanks), imel_center_freqs