Module: TorchAudio

Defined in:
lib/torchaudio.rb,
lib/torchaudio/version.rb,
lib/torchaudio/functional.rb,
lib/torchaudio/datasets/utils.rb,
lib/torchaudio/datasets/yesno.rb,
lib/torchaudio/transforms/vol.rb,
lib/torchaudio/transforms/fade.rb,
lib/torchaudio/transforms/mfcc.rb,
lib/torchaudio/transforms/mel_scale.rb,
lib/torchaudio/transforms/spectrogram.rb,
lib/torchaudio/transforms/compute_deltas.rb,
lib/torchaudio/transforms/amplitude_to_db.rb,
lib/torchaudio/transforms/mel_spectrogram.rb,
lib/torchaudio/transforms/mu_law_decoding.rb,
lib/torchaudio/transforms/mu_law_encoding.rb

Defined Under Namespace

Modules: Datasets, Functional, Transforms Classes: Error

Constant Summary collapse

VERSION =
"0.4.0"
F =
Functional

Class Method Summary collapse

Class Method Details

.load(filepath, out: nil, normalization: true, channels_first: true, num_frames: 0, offset: 0, signalinfo: nil, encodinginfo: nil, filetype: nil, format: nil) ⇒ Object

TODO remove filetype in 0.4.0



34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# File 'lib/torchaudio.rb', line 34

def load(
  filepath, out: nil, normalization: true, channels_first: true, num_frames: 0,
  offset: 0, signalinfo: nil, encodinginfo: nil, filetype: nil, format: nil
)

  filepath = filepath.to_s

  # check if valid file
  unless File.exist?(filepath)
    raise ArgumentError, "#{filepath} not found or is a directory"
  end

  # initialize output tensor
  if !out.nil?
    check_input(out)
  else
    out = Torch::FloatTensor.new
  end

  if num_frames < -1
    raise ArgumentError, "Expected value for num_samples -1 (entire file) or >=0"
  end
  if offset < 0
    raise ArgumentError, "Expected positive offset value"
  end

  # same logic as C++
  # could also make read_audio_file work with nil
  format ||= filetype || File.extname(filepath)[1..-1]

  sample_rate =
    Ext.read_audio_file(
      filepath,
      out,
      channels_first,
      num_frames,
      offset,
      signalinfo,
      encodinginfo,
      format
    )

  # normalize if needed
  normalize_audio(out, normalization)

  [out, sample_rate]
end

.load_wav(filepath, **kwargs) ⇒ Object



82
83
84
85
# File 'lib/torchaudio.rb', line 82

def load_wav(filepath, **kwargs)
  kwargs[:normalization] = 1 << 16
  load(filepath, **kwargs)
end

.save(filepath, src, sample_rate, precision: 16, channels_first: true) ⇒ Object



87
88
89
90
91
92
93
94
95
# File 'lib/torchaudio.rb', line 87

def save(filepath, src, sample_rate, precision: 16, channels_first: true)
  si = Ext::SignalInfo.new
  ch_idx = channels_first ? 0 : 1
  si.rate = sample_rate
  si.channels = src.dim == 1 ? 1 : src.size(ch_idx)
  si.length = src.numel
  si.precision = precision
  save_encinfo(filepath, src, channels_first: channels_first, signalinfo: si)
end

.save_encinfo(filepath, src, channels_first: true, signalinfo: nil, encodinginfo: nil, filetype: nil) ⇒ Object



97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# File 'lib/torchaudio.rb', line 97

def save_encinfo(filepath, src, channels_first: true, signalinfo: nil, encodinginfo: nil, filetype: nil)
  ch_idx, _len_idx = channels_first ? [0, 1] : [1, 0]

  # check if save directory exists
  abs_dirpath = File.dirname(File.expand_path(filepath))
  unless Dir.exist?(abs_dirpath)
    raise "Directory does not exist: #{abs_dirpath}"
  end
  # check that src is a CPU tensor
  check_input(src)
  # Check/Fix shape of source data
  if src.dim == 1
    # 1d tensors as assumed to be mono signals
    src.unsqueeze!(ch_idx)
  elsif src.dim > 2 || src.size(ch_idx) > 16
    # assumes num_channels < 16
    raise ArgumentError, "Expected format where C < 16, but found #{src.size}"
  end
  # sox stores the sample rate as a float, though practically sample rates are almost always integers
  # convert integers to floats
  if signalinfo
    if signalinfo.rate && !signalinfo.rate.is_a?(Float)
      if signalinfo.rate.to_f == signalinfo.rate
        signalinfo.rate = signalinfo.rate.to_f
      else
        raise ArgumentError, "Sample rate should be a float or int"
      end
    end
    # check if the bit precision (i.e. bits per sample) is an integer
    if signalinfo.precision && ! signalinfo.precision.is_a?(Integer)
      if signalinfo.precision.to_i == signalinfo.precision
        signalinfo.precision = signalinfo.precision.to_i
      else
        raise ArgumentError, "Bit precision should be an integer"
      end
    end
  end
  # programs such as librosa normalize the signal, unnormalize if detected
  if src.min >= -1.0 && src.max <= 1.0
    src = src * (1 << 31)
    src = src.long
  end
  # set filetype and allow for files with no extensions
  extension = File.extname(filepath)
  filetype = extension.length > 0 ? extension[1..-1] : filetype
  # transpose from C x L -> L x C
  if channels_first
    src = src.transpose(1, 0)
  end
  # save data to file
  src = src.contiguous
  Ext.write_audio_file(filepath, src, signalinfo, encodinginfo, filetype)
end