Module: TorchAudio
- Defined in:
- lib/torchaudio.rb,
lib/torchaudio/version.rb,
lib/torchaudio/functional.rb,
lib/torchaudio/datasets/utils.rb,
lib/torchaudio/datasets/yesno.rb,
lib/torchaudio/transforms/vol.rb,
lib/torchaudio/transforms/fade.rb,
lib/torchaudio/transforms/mfcc.rb,
lib/torchaudio/transforms/mel_scale.rb,
lib/torchaudio/transforms/spectrogram.rb,
lib/torchaudio/transforms/compute_deltas.rb,
lib/torchaudio/transforms/amplitude_to_db.rb,
lib/torchaudio/transforms/mel_spectrogram.rb,
lib/torchaudio/transforms/mu_law_decoding.rb,
lib/torchaudio/transforms/mu_law_encoding.rb
Defined Under Namespace
Modules: Datasets, Functional, Transforms Classes: Error
Constant Summary collapse
- VERSION =
"0.4.0"
- F =
Functional
Class Method Summary collapse
-
.load(filepath, out: nil, normalization: true, channels_first: true, num_frames: 0, offset: 0, signalinfo: nil, encodinginfo: nil, filetype: nil, format: nil) ⇒ Object
TODO remove filetype in 0.4.0.
- .load_wav(filepath, **kwargs) ⇒ Object
- .save(filepath, src, sample_rate, precision: 16, channels_first: true) ⇒ Object
- .save_encinfo(filepath, src, channels_first: true, signalinfo: nil, encodinginfo: nil, filetype: nil) ⇒ Object
Class Method Details
.load(filepath, out: nil, normalization: true, channels_first: true, num_frames: 0, offset: 0, signalinfo: nil, encodinginfo: nil, filetype: nil, format: nil) ⇒ Object
TODO remove filetype in 0.4.0
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
# File 'lib/torchaudio.rb', line 34 def load( filepath, out: nil, normalization: true, channels_first: true, num_frames: 0, offset: 0, signalinfo: nil, encodinginfo: nil, filetype: nil, format: nil ) filepath = filepath.to_s # check if valid file unless File.exist?(filepath) raise ArgumentError, "#{filepath} not found or is a directory" end # initialize output tensor if !out.nil? check_input(out) else out = Torch::FloatTensor.new end if num_frames < -1 raise ArgumentError, "Expected value for num_samples -1 (entire file) or >=0" end if offset < 0 raise ArgumentError, "Expected positive offset value" end # same logic as C++ # could also make read_audio_file work with nil format ||= filetype || File.extname(filepath)[1..-1] sample_rate = Ext.read_audio_file( filepath, out, channels_first, num_frames, offset, signalinfo, encodinginfo, format ) # normalize if needed normalize_audio(out, normalization) [out, sample_rate] end |
.load_wav(filepath, **kwargs) ⇒ Object
82 83 84 85 |
# File 'lib/torchaudio.rb', line 82 def load_wav(filepath, **kwargs) kwargs[:normalization] = 1 << 16 load(filepath, **kwargs) end |
.save(filepath, src, sample_rate, precision: 16, channels_first: true) ⇒ Object
87 88 89 90 91 92 93 94 95 |
# File 'lib/torchaudio.rb', line 87 def save(filepath, src, sample_rate, precision: 16, channels_first: true) si = Ext::SignalInfo.new ch_idx = channels_first ? 0 : 1 si.rate = sample_rate si.channels = src.dim == 1 ? 1 : src.size(ch_idx) si.length = src.numel si.precision = precision save_encinfo(filepath, src, channels_first: channels_first, signalinfo: si) end |
.save_encinfo(filepath, src, channels_first: true, signalinfo: nil, encodinginfo: nil, filetype: nil) ⇒ Object
97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
# File 'lib/torchaudio.rb', line 97 def save_encinfo(filepath, src, channels_first: true, signalinfo: nil, encodinginfo: nil, filetype: nil) ch_idx, _len_idx = channels_first ? [0, 1] : [1, 0] # check if save directory exists abs_dirpath = File.dirname(File.(filepath)) unless Dir.exist?(abs_dirpath) raise "Directory does not exist: #{abs_dirpath}" end # check that src is a CPU tensor check_input(src) # Check/Fix shape of source data if src.dim == 1 # 1d tensors as assumed to be mono signals src.unsqueeze!(ch_idx) elsif src.dim > 2 || src.size(ch_idx) > 16 # assumes num_channels < 16 raise ArgumentError, "Expected format where C < 16, but found #{src.size}" end # sox stores the sample rate as a float, though practically sample rates are almost always integers # convert integers to floats if signalinfo if signalinfo.rate && !signalinfo.rate.is_a?(Float) if signalinfo.rate.to_f == signalinfo.rate signalinfo.rate = signalinfo.rate.to_f else raise ArgumentError, "Sample rate should be a float or int" end end # check if the bit precision (i.e. bits per sample) is an integer if signalinfo.precision && ! signalinfo.precision.is_a?(Integer) if signalinfo.precision.to_i == signalinfo.precision signalinfo.precision = signalinfo.precision.to_i else raise ArgumentError, "Bit precision should be an integer" end end end # programs such as librosa normalize the signal, unnormalize if detected if src.min >= -1.0 && src.max <= 1.0 src = src * (1 << 31) src = src.long end # set filetype and allow for files with no extensions extension = File.extname(filepath) filetype = extension.length > 0 ? extension[1..-1] : filetype # transpose from C x L -> L x C if channels_first src = src.transpose(1, 0) end # save data to file src = src.contiguous Ext.write_audio_file(filepath, src, signalinfo, encodinginfo, filetype) end |