Top Level Namespace

Defined Under Namespace

Modules: CMath, Math, Noyes, NoyesC, NoyesFilterDSL, NoyesJava Classes: Array, Mfcc, MockNoyesServer

Constant Summary collapse

TMAGIC =
'1.0 talkhouse'
TSTART =

The following constants are packed as 4 byte big-endian integers.

[0].pack('N')
TPCM =
[1].pack('N')
TEND =
[2].pack('N')
TBYE =
[3].pack('N')
TCEPSTRA =
[4].pack('N')
TA16_16 =
[5].pack('N')
TA16_44 =
[6].pack('N')
FEAT8M16R =

Parameters for 8 kHz models and 16 kHz data

[32, 200, 3700, 256*2, 8000*2, 80*2, 205*2]
FEAT16M16R =

Paramenters for 16 kHz models and 16 kHz data

[40, 133.33, 6855.5, 512, 8000*2, 80*2, 205*2]

Instance Method Summary collapse

Instance Method Details

#file2features(file, format = FEAT8M16R) ⇒ Object

Convenience function for converting almost any type of audio file to an mfcc feature array.


39
40
41
42
# File 'lib/common/file2feat.rb', line 39

def file2features file, format = FEAT8M16R
  stream = file2fstream file, format
  stream2features stream
end

#file2fstream(file, format = FEAT8M16R) ⇒ Object

Convert audio file into an IO object with features.


4
5
6
7
8
9
# File 'lib/common/file2feat.rb', line 4

def file2fstream file, format = FEAT8M16R
  to_server = StringIO.new 'wb'
  from_server = StringIO.new 'dummy result'
  result = send_incremental_features file, to_server, from_server, 16, format
  StringIO.new to_server.string
end

#file2pcm(file, bits, freq) ⇒ Object


1
2
3
4
5
6
7
8
9
# File 'lib/common/file2pcm.rb', line 1

def file2pcm file, bits, freq
  raw = `sox #{file} -s -B -r #{freq} -b #{bits} -t raw -`
  length = bits.to_i # bits
  max = 2**length-1
  mid = 2**(length-1)
  to_signed = proc {|n| (n>=mid) ? -((n ^ max) + 1) : n}
  unpacked = raw.unpack 'n*'
  unpacked.map{|d| to_signed[d].to_f}
end

#send_incremental_features(file, to_server, from_server, bits, freqinfo) ⇒ Object

Use sox to convert a file of almost any common type int pcm. Not sure this works for anything beside 16 bits. Takes a file and two IO-like objects.


26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# File 'lib/common/send_incrementally.rb', line 26

def send_incremental_features file, to_server, from_server, bits, freqinfo
  stats = {}
  nfilt, min_freq, max_freq, nfft, freq, shift, frame_size  = *freqinfo
  preemphasizer = Preemphasizer.new 0.97
  segmenter = Segmenter.new frame_size, shift
  hamming_windower = HammingWindow.new frame_size
  power_spectrum_filter = PowerSpectrumFilter.new nfft
  mel_filter = MelFilter.new freq, nfft, nfilt, min_freq, max_freq
  compressor = LogCompressor.new
  discrete_cosine_transform = DCT.new 13, nfilt
  live_cmn = LiveCMN.new
  pcm = file2pcm file, bits, freq
  stats[:audio_length] = pcm.size/freq.to_f
  to_server.write TMAGIC
  to_server.write TSTART
  stats[:process_time] = 0
  pcm.each_slice 1230 do |data|
    process_time_start = Time.new
    data >>= preemphasizer
    data >>= segmenter
    next unless data
    data >>= hamming_windower
    data >>= power_spectrum_filter
    data >>= mel_filter
    data >>= compressor
    data >>= discrete_cosine_transform
    data >>= live_cmn
    stats[:process_time] += Time.new - process_time_start
    to_server.write TCEPSTRA
    to_server.write [data.size].pack('N')    # print '.'

    data.each {|cmn| to_server.write cmn.pack('g*')} 
    to_server.flush
  end
  to_server.write TEND
  to_server.write TBYE
  to_server.flush
  latency_start = Time.new
  if from_server
    size = from_server.read(4).unpack('N')[0]
    stats[:transcript] = from_server.read size
  end
  stats[:latency] = Time.new - latency_start
  stats
end

#send_incremental_pcm(file, to_server, from_server, depth, rate) ⇒ Object


72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# File 'lib/common/send_incrementally.rb', line 72

def send_incremental_pcm file, to_server, from_server, depth, rate
  raw = `sox #{file} -s -B -r #{rate} -b #{depth} -t raw -` 
  to_server.write TMAGIC
  to_server.write TSTART
  chunk = raw.slice! 0, 1024
  while chunk.size > 0
    to_server.write TA16_16
    to_server.write [chunk.size/2].pack('N')
    to_server.write chunk    # print '.'

    to_server.flush
    chunk = raw.slice! 0, 1024
  end
  to_server.write TEND
  to_server.write TBYE
  to_server.flush
  from_server.read
end

#stream2features(stream) ⇒ Object

Take a talkhouse feature stream and convert it into an array.


12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# File 'lib/common/file2feat.rb', line 12

def stream2features stream
  observations = []
  raise "Unexpected magic number." if stream.read(TMAGIC.size) != TMAGIC
  raise "Expected TSTART."         if stream.read(4) != TSTART
  loop do
    case stream.read(4)
      when TPCM
      count = stream.read(4).unpack('N')[0]
      pcm = stream.read count
      pcm = pcm.unpack('g*')
      when TCEPSTRA
      count = stream.read(4).unpack('N')[0]
      cmn = Array.new(count) {stream.read(13*4).unpack('g*')}
      observations += cmn
      when TBYE
      break
      when TEND
      else
      end
   end
  delta_filter = Noyes::DoubleDeltaFilter.new
  observations >>= delta_filter
  observations.map {|a| a.flatten}
end

#to_signed_short(n) ⇒ Object

Converts from unsigned to signed short. Ruby, strangely enough, doesn't have network byte order short conversion for signed shorts.


13
14
15
16
17
18
# File 'lib/common/noyes_math.rb', line 13

def to_signed_short n
  length = 16 # bits
  max = 2**length-1
  mid = 2**(length-1)
  n>=mid ? -((n ^ max) + 1) : n
end