Python源码示例:torch.hann_window()
示例1
def _feature_window_function(window_type: str,
window_size: int,
blackman_coeff: float,
device: torch.device,
dtype: int,
) -> Tensor:
r"""Returns a window function with the given type and size
"""
if window_type == HANNING:
return torch.hann_window(window_size, periodic=False, device=device, dtype=dtype)
elif window_type == HAMMING:
return torch.hamming_window(window_size, periodic=False, alpha=0.54, beta=0.46, device=device, dtype=dtype)
elif window_type == POVEY:
# like hanning but goes to zero at edges
return torch.hann_window(window_size, periodic=False, device=device, dtype=dtype).pow(0.85)
elif window_type == RECTANGULAR:
return torch.ones(window_size, device=device, dtype=dtype)
elif window_type == BLACKMAN:
a = 2 * math.pi / (window_size - 1)
window_function = torch.arange(window_size, device=device, dtype=dtype)
# can't use torch.blackman_window as they use different coefficients
return (blackman_coeff - 0.5 * torch.cos(a * window_function) +
(0.5 - blackman_coeff) * torch.cos(2 * a * window_function)).to(device=device, dtype=dtype)
else:
raise Exception('Invalid window type ' + window_type)
示例2
def __init__(self,
n_fft: int = 400,
win_length: Optional[int] = None,
hop_length: Optional[int] = None,
pad: int = 0,
window_fn: Callable[..., Tensor] = torch.hann_window,
power: Optional[float] = 2.,
normalized: bool = False,
wkwargs: Optional[dict] = None) -> None:
super(Spectrogram, self).__init__()
self.n_fft = n_fft
# number of FFT bins. the returned STFT result will have n_fft // 2 + 1
# number of frequecies due to onesided=True in torch.stft
self.win_length = win_length if win_length is not None else n_fft
self.hop_length = hop_length if hop_length is not None else self.win_length // 2
window = window_fn(self.win_length) if wkwargs is None else window_fn(self.win_length, **wkwargs)
self.register_buffer('window', window)
self.pad = pad
self.power = power
self.normalized = normalized
示例3
def test_griffinlim(self):
# NOTE: This test is flaky without a fixed random seed
# See https://github.com/pytorch/audio/issues/382
torch.random.manual_seed(42)
tensor = torch.rand((1, 1000))
n_fft = 400
ws = 400
hop = 100
window = torch.hann_window(ws)
normalize = False
momentum = 0.99
n_iter = 8
length = 1000
rand_init = False
init = 'random' if rand_init else None
specgram = F.spectrogram(tensor, 0, window, n_fft, hop, ws, 2, normalize).sqrt()
ta_out = F.griffinlim(specgram, window, n_fft, hop, ws, 1, normalize,
n_iter, momentum, length, rand_init)
lr_out = librosa.griffinlim(specgram.squeeze(0).numpy(), n_iter=n_iter, hop_length=hop,
momentum=momentum, init=init, length=length)
lr_out = torch.from_numpy(lr_out).unsqueeze(0)
self.assertEqual(ta_out, lr_out, atol=5e-5, rtol=1e-5)
示例4
def test_griffinlim(self):
def func(tensor):
n_fft = 400
ws = 400
hop = 200
window = torch.hann_window(ws, device=tensor.device, dtype=tensor.dtype)
power = 2.
normalize = False
momentum = 0.99
n_iter = 32
length = 1000
rand_int = False
return F.griffinlim(tensor, window, n_fft, hop, ws, power, normalize, n_iter, momentum, length, rand_int)
tensor = torch.rand((1, 201, 6))
self._assert_consistency(func, tensor)
示例5
def compute_torch_stft(audio, descriptor):
name, *args = descriptor.split("_")
n_fft, hop_size, *rest = args
n_fft = int(n_fft)
hop_size = int(hop_size)
stft = torch.stft(
audio,
n_fft=n_fft,
hop_length=hop_size,
window=torch.hann_window(n_fft, device=audio.device)
)
stft = torch.sqrt((stft ** 2).sum(-1))
return stft
示例6
def get_window(name, window_length, squared=False):
"""
Returns a windowing function.
Arguments:
----------
window (str) : name of the window, currently only 'hann' is available
window_length (int) : length of the window
squared (bool) : if true, square the window
Returns:
----------
torch.FloatTensor : window of size `window_length`
"""
if name == "hann":
window = torch.hann_window(window_length)
elif name == "hamming":
window = torch.hamming_window(window_length)
elif name == "blackman":
window = torch.blackman_window(window_length)
else:
raise ValueError("Invalid window name {}".format(name))
if squared:
window *= window
return window
示例7
def __init__(self, win_length=1024, hop_length=256, n_fft=2048, n_mels=80, preemp=True):
super(MelSpectrogram, self).__init__()
if preemp:
self.preemp = nn.Conv1d(1, 1, 2, bias=False, padding=1)
self.preemp.weight.data[0][0][0] = -0.97
self.preemp.weight.data[0][0][1] = 1.0
self.preemp.weight.requires_grad = False
else:
self.preemp = None
self.register_buffer('mel_basis', _build_mel_basis(n_fft, n_mels))
win = torch.hann_window(win_length)
self.register_buffer('win', win)
self.win_length = win_length
self.hop_length = hop_length
self.n_fft = n_fft
示例8
def __init__(self, win_length, hop_length):
super().__init__()
self.win_length = win_length
self.hop_length = hop_length
self.disable_casts = self._opt_level == Optimization.mxprO1
self.torch_windows = {
'hann': torch.hann_window,
'hamming': torch.hamming_window,
'blackman': torch.blackman_window,
'bartlett': torch.bartlett_window,
'ones': torch.ones,
None: torch.ones,
}
示例9
def __init__(self,
n_fft: int = 400,
n_iter: int = 32,
win_length: Optional[int] = None,
hop_length: Optional[int] = None,
window_fn: Callable[..., Tensor] = torch.hann_window,
power: float = 2.,
normalized: bool = False,
wkwargs: Optional[dict] = None,
momentum: float = 0.99,
length: Optional[int] = None,
rand_init: bool = True) -> None:
super(GriffinLim, self).__init__()
assert momentum < 1, 'momentum=%s > 1 can be unstable' % momentum
assert momentum > 0, 'momentum=%s < 0' % momentum
self.n_fft = n_fft
self.n_iter = n_iter
self.win_length = win_length if win_length is not None else n_fft
self.hop_length = hop_length if hop_length is not None else self.win_length // 2
window = window_fn(self.win_length) if wkwargs is None else window_fn(self.win_length, **wkwargs)
self.register_buffer('window', window)
self.normalized = normalized
self.length = length
self.power = power
self.momentum = momentum / (1 + momentum)
self.rand_init = rand_init
示例10
def __init__(self,
sample_rate: int = 16000,
n_fft: int = 400,
win_length: Optional[int] = None,
hop_length: Optional[int] = None,
f_min: float = 0.,
f_max: Optional[float] = None,
pad: int = 0,
n_mels: int = 128,
window_fn: Callable[..., Tensor] = torch.hann_window,
power: Optional[float] = 2.,
normalized: bool = False,
wkwargs: Optional[dict] = None) -> None:
super(MelSpectrogram, self).__init__()
self.sample_rate = sample_rate
self.n_fft = n_fft
self.win_length = win_length if win_length is not None else n_fft
self.hop_length = hop_length if hop_length is not None else self.win_length // 2
self.pad = pad
self.power = power
self.normalized = normalized
self.n_mels = n_mels # number of mel frequency bins
self.f_max = f_max
self.f_min = f_min
self.spectrogram = Spectrogram(n_fft=self.n_fft, win_length=self.win_length,
hop_length=self.hop_length,
pad=self.pad, window_fn=window_fn, power=self.power,
normalized=self.normalized, wkwargs=wkwargs)
self.mel_scale = MelScale(self.n_mels, self.sample_rate, self.f_min, self.f_max, self.n_fft // 2 + 1)
示例11
def test_griffinlim(self):
n_fft = 400
ws = 400
hop = 200
window = torch.hann_window(ws)
power = 2
normalize = False
momentum = 0.99
n_iter = 32
length = 1000
tensor = torch.rand((1, 201, 6))
self.assert_batch_consistencies(
F.griffinlim, tensor, window, n_fft, hop, ws, power, normalize, n_iter, momentum, length, 0, atol=5e-5
)
示例12
def test_istft_is_inverse_of_stft1(self):
# hann_window, centered, normalized, onesided
kwargs1 = {
'n_fft': 12,
'hop_length': 4,
'win_length': 12,
'window': torch.hann_window(12),
'center': True,
'pad_mode': 'reflect',
'normalized': True,
'onesided': True,
}
_test_istft_is_inverse_of_stft(kwargs1)
示例13
def test_istft_is_inverse_of_stft2(self):
# hann_window, centered, not normalized, not onesided
kwargs2 = {
'n_fft': 12,
'hop_length': 2,
'win_length': 8,
'window': torch.hann_window(8),
'center': True,
'pad_mode': 'reflect',
'normalized': False,
'onesided': False,
}
_test_istft_is_inverse_of_stft(kwargs2)
示例14
def test_linearity_of_istft1(self):
# hann_window, centered, normalized, onesided
kwargs1 = {
'n_fft': 12,
'window': torch.hann_window(12),
'center': True,
'pad_mode': 'reflect',
'normalized': True,
'onesided': True,
}
data_size = (2, 7, 7, 2)
self._test_linearity_of_istft(data_size, kwargs1)
示例15
def test_linearity_of_istft2(self):
# hann_window, centered, not normalized, not onesided
kwargs2 = {
'n_fft': 12,
'window': torch.hann_window(12),
'center': True,
'pad_mode': 'reflect',
'normalized': False,
'onesided': False,
}
data_size = (2, 12, 7, 2)
self._test_linearity_of_istft(data_size, kwargs2)
示例16
def test_spectrogram(self):
def func(tensor):
n_fft = 400
ws = 400
hop = 200
pad = 0
window = torch.hann_window(ws, device=tensor.device, dtype=tensor.dtype)
power = 2.
normalize = False
return F.spectrogram(tensor, pad, window, n_fft, hop, ws, power, normalize)
tensor = common_utils.get_whitenoise()
self._assert_consistency(func, tensor)
示例17
def __init__(self, hp):
self.hp = hp
self.window = torch.hann_window(window_length=hp.audio.win_length).cuda()
self.mel_basis = librosa.filters.mel(
sr=hp.audio.sr,
n_fft=hp.audio.n_fft,
n_mels=hp.audio.n_mels
)
self.mel_basis = torch.from_numpy(self.mel_basis).cuda() # [n_mels, n_fft//2+1]
self.criterion = torch.nn.MSELoss()
示例18
def stft(y, scale='linear'):
D = torch.stft(y, n_fft=1024, hop_length=256, win_length=1024, window=torch.hann_window(1024).cuda())
D = torch.sqrt(D.pow(2).sum(-1) + 1e-10)
# D = torch.sqrt(torch.clamp(D.pow(2).sum(-1), min=1e-10))
if scale == 'linear':
return D
elif scale == 'log':
S = 2 * torch.log(torch.clamp(D, 1e-10, float("inf")))
return S
else:
pass
# STFT code is adapted from: https://github.com/pseeth/pytorch-stft
示例19
def _power_loss(self, p_y, t_y):
fft_orig = torch.stft(t_y.reshape(t_y.shape[0]), n_fft=512,
window=torch.hann_window(window_length=512).to(device))
fft_pred = torch.stft(p_y.reshape(p_y.shape[0]), n_fft=512,
window=torch.hann_window(window_length=512).to(device))
real_orig = fft_orig[:, :, 0]
im_org = fft_orig[:, :, 1]
power_orig = torch.sqrt(torch.pow(real_orig, 2) + torch.pow(im_org, 2))
real_pred = fft_pred[:, :, 0]
im_pred = fft_pred[:, :, 1]
power_pred = torch.sqrt(torch.pow(real_pred, 2) + torch.pow(im_pred, 2))
return torch.sum(torch.pow(torch.norm(torch.abs(power_pred) - torch.abs(power_orig), p=2, dim=1), 2)) / (
power_pred.shape[0] * power_pred.shape[1])
示例20
def __init__(self, filter_size, block_size):
super(Generator, self).__init__()
self.apply(self.init_parameters)
self.block_size = block_size
self.filter_size = filter_size
self.noise_att = 1e-4
self.filter_window = nn.Parameter(torch.hann_window(filter_size).roll(filter_size//2,-1),requires_grad=False)
self.filter_coef = None
示例21
def compute_stft(audio, n_fft=1024, win_length=1024, hop_length=256):
"""
Computes STFT transformation of given audio
Args:
audio (Tensor): B x T, batch of audio
Returns:
mag (Tensor): STFT magnitudes
real (Tensor): Real part of STFT transformation result
im (Tensor): Imagine part of STFT transformation result
"""
win = torch.hann_window(win_length).cuda()
# add some padding because torch 4.0 doesn't
signal_dim = audio.dim()
extended_shape = [1] * (3 - signal_dim) + list(audio.size())
# pad = int(self.n_fft // 2)
pad = win_length
audio = F.pad(audio.view(extended_shape), (pad, pad), 'constant')
audio = audio.view(audio.shape[-signal_dim:])
stft = torch.stft(audio, win_length, hop_length, fft_size=n_fft, window=win)
real = stft[:, :, :, 0]
im = stft[:, :, :, 1]
power = torch.sqrt(torch.pow(real, 2) + torch.pow(im, 2))
return power, real, im
示例22
def __init__(self, win_length=1024, hop_length=256, n_fft=2048, preemp=True):
super(Spectrogram, self).__init__()
if preemp:
self.preemp = nn.Conv1d(1, 1, 2, bias=False, padding=1)
self.preemp.weight.data[0][0][0] = -0.97
self.preemp.weight.data[0][0][1] = 1.0
self.preemp.weight.requires_grad = False
else:
self.preemp = None
win = torch.hann_window(win_length)
self.register_buffer('win', win)
self.win_length = win_length
self.hop_length = hop_length
self.n_fft = n_fft
示例23
def stft(y):
D = torch.stft(y, n_fft=1024, hop_length=256, win_length=1024, window=torch.hann_window(1024).cuda())
D = torch.sqrt(D.pow(2).sum(-1) + 1e-10)
S = 2 * torch.log(torch.clamp(D, 1e-10, float("inf")))
return D, S
示例24
def stft(y, scale='linear'):
D = torch.stft(y, n_fft=1024, hop_length=256, win_length=1024)#, window=torch.hann_window(1024).cuda())
D = torch.sqrt(D.pow(2).sum(-1) + 1e-10)
# D = torch.sqrt(torch.clamp(D.pow(2).sum(-1), min=1e-10))
if scale == 'linear':
return D
elif scale == 'log':
S = 2 * torch.log(torch.clamp(D, 1e-10, float("inf")))
return S
else:
pass
示例25
def __init__(self, sample_rate=8000, window_size=0.02, window_stride=0.01,
n_fft=None,
window="hamming", normalize="per_feature", log=True, center=True,
dither=constant, pad_to=8, max_duration=16.7,
frame_splicing=1):
super(SpectrogramFeatures, self).__init__()
torch_windows = {
'hann': torch.hann_window,
'hamming': torch.hamming_window,
'blackman': torch.blackman_window,
'bartlett': torch.bartlett_window,
'none': None,
}
self.win_length = int(sample_rate * window_size)
self.hop_length = int(sample_rate * window_stride)
self.n_fft = n_fft or 2 ** math.ceil(math.log2(self.win_length))
window_fn = torch_windows.get(window, None)
window_tensor = window_fn(self.win_length,
periodic=False) if window_fn else None
self.window = window_tensor
self.normalize = normalize
self.log = log
self.center = center
self.dither = dither
self.pad_to = pad_to
self.frame_splicing = frame_splicing
max_length = 1 + math.ceil(
(max_duration * sample_rate - self.win_length) / self.hop_length
)
max_pad = 16 - (max_length % 16)
self.max_length = max_length + max_pad
示例26
def __init__(
self,
n_fft=1024,
hop_length=256,
win_length=1024,
sampling_rate=22050,
n_mel_channels=80,
mel_fmin=0.0,
mel_fmax=None,
):
super().__init__()
##############################################
# FFT Parameters #
##############################################
window = torch.hann_window(win_length).float()
mel_basis = librosa_mel_fn(
sampling_rate, n_fft, n_mel_channels, mel_fmin, mel_fmax
)
mel_basis = torch.from_numpy(mel_basis).float()
self.register_buffer("mel_basis", mel_basis)
self.register_buffer("window", window)
self.n_fft = n_fft
self.hop_length = hop_length
self.win_length = win_length
self.sampling_rate = sampling_rate
self.n_mel_channels = n_mel_channels
示例27
def __init__(
self,
n_fft=4096,
n_hop=1024,
center=False
):
super(STFT, self).__init__()
self.window = nn.Parameter(
torch.hann_window(n_fft),
requires_grad=False
)
self.n_fft = n_fft
self.n_hop = n_hop
self.center = center
示例28
def test_InverseMelScale(self):
"""InverseMelScale transform is comparable to that of librosa"""
n_fft = 2048
n_mels = 256
n_stft = n_fft // 2 + 1
hop_length = n_fft // 4
# Prepare mel spectrogram input. We use torchaudio to compute one.
common_utils.set_audio_backend('default')
sound, sample_rate = _load_audio_asset(
'steam-train-whistle-daniel_simon.wav', offset=2**10, num_frames=2**14)
sound = sound.mean(dim=0, keepdim=True)
spec_orig = F.spectrogram(
sound, pad=0, window=torch.hann_window(n_fft), n_fft=n_fft,
hop_length=hop_length, win_length=n_fft, power=2, normalized=False)
melspec_ta = torchaudio.transforms.MelScale(n_mels=n_mels, sample_rate=sample_rate)(spec_orig)
melspec_lr = melspec_ta.cpu().numpy().squeeze()
# Perform InverseMelScale with torch audio and librosa
spec_ta = torchaudio.transforms.InverseMelScale(
n_stft, n_mels=n_mels, sample_rate=sample_rate)(melspec_ta)
spec_lr = librosa.feature.inverse.mel_to_stft(
melspec_lr, sr=sample_rate, n_fft=n_fft, power=2.0, htk=True, norm=None)
spec_lr = torch.from_numpy(spec_lr[None, ...])
# Align dimensions
# librosa does not return power spectrogram while torchaudio returns power spectrogram
spec_orig = spec_orig.sqrt()
spec_ta = spec_ta.sqrt()
threshold = 2.0
# This threshold was choosen empirically, based on the following observation
#
# torch.dist(spec_lr, spec_ta, p=float('inf'))
# >>> tensor(1.9666)
#
# The spectrograms reconstructed by librosa and torchaudio are not comparable elementwise.
# This is because they use different approximation algorithms and resulting values can live
# in different magnitude. (although most of them are very close)
# See
# https://github.com/pytorch/audio/pull/366 for the discussion of the choice of algorithm
# https://github.com/pytorch/audio/pull/448/files#r385747021 for the distribution of P-inf
# distance over frequencies.
self.assertEqual(spec_ta, spec_lr, atol=threshold, rtol=1e-5)
threshold = 1700.0
# This threshold was choosen empirically, based on the following observations
#
# torch.dist(spec_orig, spec_ta, p=1)
# >>> tensor(1644.3516)
# torch.dist(spec_orig, spec_lr, p=1)
# >>> tensor(1420.7103)
# torch.dist(spec_lr, spec_ta, p=1)
# >>> tensor(943.2759)
assert torch.dist(spec_orig, spec_ta, p=1) < threshold
示例29
def __init__(self, sample_rate=8000, window_size=0.02, window_stride=0.01,
window="hamming", normalize="per_feature", n_fft=None,
preemph=0.97,
nfilt=64, lowfreq=0, highfreq=None, log=True, dither=constant,
pad_to=8,
max_duration=16.7,
frame_splicing=1):
super(FilterbankFeatures, self).__init__()
# print("PADDING: {}".format(pad_to))
torch_windows = {
'hann': torch.hann_window,
'hamming': torch.hamming_window,
'blackman': torch.blackman_window,
'bartlett': torch.bartlett_window,
'none': None,
}
self.win_length = int(sample_rate * window_size) # frame size
self.hop_length = int(sample_rate * window_stride)
self.n_fft = n_fft or 2 ** math.ceil(math.log2(self.win_length))
self.normalize = normalize
self.log = log
self.dither = dither
self.frame_splicing = frame_splicing
self.nfilt = nfilt
self.preemph = preemph
self.pad_to = pad_to
# For now, always enable this.
# See https://docs.google.com/presentation/d/1IVC3J-pHB-ipJpKsJox_SqmDHYdkIaoCXTbKmJmV2-I/edit?usp=sharing for elaboration
self.use_deterministic_dithering = True
highfreq = highfreq or sample_rate / 2
window_fn = torch_windows.get(window, None)
window_tensor = window_fn(self.win_length,
periodic=False) if window_fn else None
filterbanks = torch.tensor(
librosa.filters.mel(sample_rate, self.n_fft, n_mels=nfilt, fmin=lowfreq,
fmax=highfreq), dtype=torch.float).unsqueeze(0)
# self.fb = filterbanks
# self.window = window_tensor
self.register_buffer("fb", filterbanks)
self.register_buffer("window", window_tensor)
# Calculate maximum sequence length (# frames)
max_length = 1 + math.ceil(
(max_duration * sample_rate - self.win_length) / self.hop_length
)
max_pad = 16 - (max_length % 16)
self.max_length = max_length + max_pad
示例30
def __init__(self, sample_rate=8000, window_size=0.02, window_stride=0.01,
window="hamming", normalize="per_feature", n_fft=None,
preemph=0.97,
nfilt=64, lowfreq=0, highfreq=None, log=True, dither=constant,
pad_to=8,
max_duration=16.7,
frame_splicing=1):
super(FilterbankFeatures, self).__init__()
# print("PADDING: {}".format(pad_to))
torch_windows = {
'hann': torch.hann_window,
'hamming': torch.hamming_window,
'blackman': torch.blackman_window,
'bartlett': torch.bartlett_window,
'none': None,
}
self.win_length = int(sample_rate * window_size) # frame size
self.hop_length = int(sample_rate * window_stride)
self.n_fft = n_fft or 2 ** math.ceil(math.log2(self.win_length))
self.normalize = normalize
self.log = log
self.dither = dither
self.frame_splicing = frame_splicing
self.nfilt = nfilt
self.preemph = preemph
self.pad_to = pad_to
highfreq = highfreq or sample_rate / 2
window_fn = torch_windows.get(window, None)
window_tensor = window_fn(self.win_length,
periodic=False) if window_fn else None
filterbanks = torch.tensor(
librosa.filters.mel(sample_rate, self.n_fft, n_mels=nfilt, fmin=lowfreq,
fmax=highfreq), dtype=torch.float).unsqueeze(0)
# self.fb = filterbanks
# self.window = window_tensor
self.register_buffer("fb", filterbanks)
self.register_buffer("window", window_tensor)
# Calculate maximum sequence length (# frames)
max_length = 1 + math.ceil(
(max_duration * sample_rate - self.win_length) / self.hop_length
)
max_pad = 16 - (max_length % 16)
self.max_length = max_length + max_pad