Class: TorchAudio::Transforms::MelScale

Inherits:
Torch::NN::Module
  • Object
show all
Defined in:
lib/torchaudio/transforms/mel_scale.rb

Instance Method Summary collapse

Constructor Details

#initialize(n_mels: 128, sample_rate: 16000, f_min: 0.0, f_max: nil, n_stft: nil) ⇒ MelScale

Returns a new instance of MelScale.

Raises:

  • (ArgumentError)


4
5
6
7
8
9
10
11
12
13
14
15
# File 'lib/torchaudio/transforms/mel_scale.rb', line 4

def initialize(n_mels: 128, sample_rate: 16000, f_min: 0.0, f_max: nil, n_stft: nil)
  super()
  @n_mels = n_mels
  @sample_rate = sample_rate
  @f_max = f_max || sample_rate.div(2).to_f
  @f_min = f_min

  raise ArgumentError, "Require f_min: %f < f_max: %f" % [f_min, @f_max] unless f_min <= @f_max

  fb = n_stft.nil? ? Torch.empty(0) : F.create_fb_matrix(n_stft, @f_min, @f_max, @n_mels, @sample_rate)
  register_buffer("fb", fb)
end

Instance Method Details

#forward(specgram) ⇒ Object



17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# File 'lib/torchaudio/transforms/mel_scale.rb', line 17

def forward(specgram)
  shape = specgram.size
  specgram = specgram.reshape(-1, shape[-2], shape[-1])

  if @fb.numel == 0
    tmp_fb = F.create_fb_matrix(specgram.size(1), @f_min, @f_max, @n_mels, @sample_rate)
    # Attributes cannot be reassigned outside __init__ so workaround
    @fb.resize!(tmp_fb.size)
    @fb.copy!(tmp_fb)
  end

  # (channel, frequency, time).transpose(...) dot (frequency, n_mels)
  # -> (channel, time, n_mels).transpose(...)
  mel_specgram = Torch.matmul(specgram.transpose(1, 2), @fb).transpose(1, 2)

  # unpack batch
  mel_specgram = mel_specgram.reshape(shape[0...-2] + mel_specgram.shape[-2..-1])

  mel_specgram
end