Module: Informers::Utils
- Defined in:
- lib/informers/utils/hub.rb,
lib/informers/processors.rb,
lib/informers/tokenizers.rb,
lib/informers/utils/core.rb,
lib/informers/utils/math.rb,
lib/informers/utils/audio.rb,
lib/informers/utils/image.rb,
lib/informers/utils/ffmpeg.rb,
lib/informers/utils/tensor.rb,
lib/informers/utils/generation.rb
Defined Under Namespace
Modules: Hub
Classes: BeamSearchSampler, ForcedBOSTokenLogitsProcessor, ForcedEOSTokenLogitsProcessor, GenerationConfig, GreedySampler, LogitsProcessor, LogitsProcessorList, MinLengthLogitsProcessor, NoRepeatNGramLogitsProcessor, RawImage, Sampler
Class Method Summary
collapse
-
._build_translation_inputs(slf, raw_inputs, tokenizer_options, generate_kwargs) ⇒ Object
-
.calculate_reflect_offset(i, w) ⇒ Object
-
.center_to_corners_format(v) ⇒ Object
-
.dims(tensor) ⇒ Object
-
.dispatch_callback(progress_callback, data) ⇒ Object
-
.ffmpeg_read(data, sampling_rate) ⇒ Object
from the Transformers Python library.
-
.get_top_items(items, top_k = 0) ⇒ Object
-
.interpolate(input, shape, mode = "bilinear", align_corners = false) ⇒ Object
-
.interpolate_data(input, in_shape, out_shape, mode = "bilinear", align_corners = false) ⇒ Object
-
.max(arr) ⇒ Object
-
.mean_pooling(last_hidden_state, attention_mask) ⇒ Object
-
.normalize(result) ⇒ Object
-
.ones_like(tensor) ⇒ Object
-
.post_process_object_detection(outputs, threshold = 0.5, target_sizes = nil, is_zero_shot = false) ⇒ Object
-
.read_audio(input, sampling_rate) ⇒ Object
-
.reshape(arr, dims) ⇒ Object
-
.sigmoid(arr) ⇒ Object
-
.softmax(arr) ⇒ Object
-
.stack(tensors, dim = 0) ⇒ Object
Class Method Details
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
|
# File 'lib/informers/tokenizers.rb', line 216
def self._build_translation_inputs(slf, raw_inputs, tokenizer_options, generate_kwargs)
if !slf.respond_to?(:language_codes) || !slf.language_codes.is_a?(Array)
raise Error, "Tokenizer must have `language_codes` attribute set and it should be an array of language ids."
end
if !slf.respond_to?(:language_regex) || !slf.language_regex.is_a?(Regexp)
raise Error, "Tokenizer must have `language_regex` attribute set and it should be a regular expression."
end
if !slf.respond_to?(:lang_to_token) || !slf.lang_to_token.respond_to?(:call)
raise Error, "Tokenizer must have `lang_to_token` attribute set and it should be a function."
end
src_lang_token = generate_kwargs[:src_lang]
tgt_lang_token = generate_kwargs[:tgt_lang]
if !slf.language_codes.include?(tgt_lang_token)
raise Error, "Target language code #{tgt_lang_token.inspect} is not valid. Must be one of: #{slf.language_codes.join(", ")}"
end
if !src_lang_token.nil?
if !slf.language_codes.include?(src_lang_token)
raise Error, "Source language code #{src_lang_token.inspect} is not valid. Must be one of: #{slf.language_codes.join(", ")}"
end
end
generate_kwargs["forced_bos_token_id"] = slf.convert_tokens_to_ids([slf.lang_to_token.(tgt_lang_token)])[0]
slf.(raw_inputs, **tokenizer_options)
end
|
.calculate_reflect_offset(i, w) ⇒ Object
7
8
9
|
# File 'lib/informers/utils/core.rb', line 7
def self.calculate_reflect_offset(i, w)
((i + w) % (2 * w) - w).abs
end
|
658
659
660
661
662
663
664
665
666
|
# File 'lib/informers/processors.rb', line 658
def self.center_to_corners_format(v)
centerX, centerY, width, height = v
[
centerX - width / 2.0,
centerY - height / 2.0,
centerX + width / 2.0,
centerY + height / 2.0
]
end
|
.dims(tensor) ⇒ Object
37
38
39
40
41
42
43
44
|
# File 'lib/informers/utils/tensor.rb', line 37
def self.dims(tensor)
dims = []
while tensor.is_a?(Array)
dims << tensor.size
tensor = tensor[0]
end
dims
end
|
.dispatch_callback(progress_callback, data) ⇒ Object
3
4
5
|
# File 'lib/informers/utils/core.rb', line 3
def self.dispatch_callback(progress_callback, data)
progress_callback.(data) if progress_callback
end
|
.ffmpeg_read(data, sampling_rate) ⇒ Object
from the Transformers Python library
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
|
# File 'lib/informers/utils/ffmpeg.rb', line 18
def self.ffmpeg_read(data, sampling_rate)
ar = "#{sampling_rate}"
ac = "1"
format_for_conversion = "f32le"
ffmpeg_command = [
"ffmpeg",
"-i",
"pipe:0",
"-ac",
ac,
"-ar",
ar,
"-f",
format_for_conversion,
"-hide_banner",
"-loglevel",
"quiet",
"pipe:1"
]
stdout, status = Open3.capture2(*ffmpeg_command, stdin_data: data)
if !status.success?
raise Error, "ffmpeg was not found but is required to load audio files from filename"
end
stdout.unpack("e*")
end
|
.get_top_items(items, top_k = 0) ⇒ Object
96
97
98
99
100
101
102
103
104
105
106
107
108
|
# File 'lib/informers/utils/math.rb', line 96
def self.get_top_items(items, top_k = 0)
items = items
.map.with_index { |x, i| [i, x] } .sort_by { |v| -v[1] }
if !top_k.nil? && top_k > 0
items = items.slice(0, top_k) end
items
end
|
.interpolate(input, shape, mode = "bilinear", align_corners = false) ⇒ Object
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
|
# File 'lib/informers/utils/tensor.rb', line 46
def self.interpolate(input, shape, mode = "bilinear", align_corners = false)
out_height, out_width = shape
in_channels = dims(input)[-3] || 1
in_height = dims(input)[-2]
in_width = dims(input)[-1]
output = interpolate_data(
input.flatten,
[in_channels, in_height, in_width],
[out_height, out_width],
mode,
align_corners
)
reshape(output, [in_channels, out_height, out_width])
end
|
.interpolate_data(input, in_shape, out_shape, mode = "bilinear", align_corners = false) ⇒ Object
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
|
# File 'lib/informers/utils/math.rb', line 3
def self.interpolate_data(input, in_shape, out_shape, mode = "bilinear", align_corners = false)
in_channels, in_height, in_width = in_shape
out_height, out_width = out_shape
x_scale = out_width / in_width.to_f
y_scale = out_height / in_height.to_f
out_img = Array.new(out_height * out_width * in_channels)
in_stride = in_height * in_width
out_stride = out_height * out_width
out_height.times do |i|
out_width.times do |j|
out_offset = i * out_width + j
x = (j + 0.5) / x_scale - 0.5
y = (i + 0.5) / y_scale - 0.5
x1 = x.floor
y1 = y.floor
x2 = [x1 + 1, in_width - 1].min
y2 = [y1 + 1, in_height - 1].min
x1 = [x1, 0].max
y1 = [y1, 0].max
s = x - x1
t = y - y1
w1 = (1 - s) * (1 - t)
w2 = s * (1 - t)
w3 = (1 - s) * t
w4 = s * t
y_stride = y1 * in_width
x_stride = y2 * in_width
idx1 = y_stride + x1
idx2 = y_stride + x2
idx3 = x_stride + x1
idx4 = x_stride + x2
in_channels.times do |k|
c_offset = k * in_stride
out_img[k * out_stride + out_offset] =
w1 * input[c_offset + idx1] +
w2 * input[c_offset + idx2] +
w3 * input[c_offset + idx3] +
w4 * input[c_offset + idx4]
end
end
end
out_img
end
|
.max(arr) ⇒ Object
110
111
112
113
114
115
|
# File 'lib/informers/utils/math.rb', line 110
def self.max(arr)
if arr.length == 0
raise Error, "Array must not be empty"
end
arr.map.with_index.max_by { |v, _| v }
end
|
.mean_pooling(last_hidden_state, attention_mask) ⇒ Object
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
|
# File 'lib/informers/utils/tensor.rb', line 3
def self.mean_pooling(last_hidden_state, attention_mask)
last_hidden_state.zip(attention_mask).map do |state, mask|
state[0].size.times.map do |k|
sum = 0.0
count = 0
state.zip(mask) do |s, m|
count += m
sum += s[k] * m
end
sum / count
end
end
end
|
.normalize(result) ⇒ Object
19
20
21
22
23
24
|
# File 'lib/informers/utils/tensor.rb', line 19
def self.normalize(result)
result.map do |row|
norm = Math.sqrt(row.sum { |v| v * v })
row.map { |v| v / norm }
end
end
|
.ones_like(tensor) ⇒ Object
30
31
32
33
34
35
|
# File 'lib/informers/utils/tensor.rb', line 30
def self.ones_like(tensor)
if tensor[0].is_a?(Array)
return tensor.map { |v| ones_like(v) }
end
tensor.map { |_| 1 }
end
|
.post_process_object_detection(outputs, threshold = 0.5, target_sizes = nil, is_zero_shot = false) ⇒ Object
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
|
# File 'lib/informers/processors.rb', line 668
def self.post_process_object_detection(outputs, threshold = 0.5, target_sizes = nil, is_zero_shot = false)
out_logits = outputs[:logits]
out_bbox = outputs[:pred_boxes]
batch_size, num_boxes, num_classes = out_logits.size, out_logits[0].size, out_logits[0][0].size
if !target_sizes.nil? && target_sizes.length != batch_size
raise Error, "Make sure that you pass in as many target sizes as the batch dimension of the logits"
end
to_return = []
batch_size.times do |i|
target_size = !target_sizes.nil? ? target_sizes[i] : nil
info = {
boxes: [],
classes: [],
scores: []
}
logits = out_logits[i]
bbox = out_bbox[i]
num_boxes.times do |j|
logit = logits[j]
indices = []
if is_zero_shot
probs = Utils.sigmoid(logit)
probs.length.times do |k|
if probs[k] > threshold
indices << k
end
end
else
max_index = Utils.max(logit)[1]
if max_index == num_classes - 1
next
end
indices << max_index
probs = Utils.softmax(logit)
end
indices.each do |index|
box = bbox[j]
box = center_to_corners_format(box)
if !target_size.nil?
box = box.map.with_index { |x, i| x * target_size[(i + 1) % 2] }
end
info[:boxes] << box
info[:classes] << index
info[:scores] << probs[index]
end
end
to_return << info
end
to_return
end
|
.read_audio(input, sampling_rate) ⇒ Object
3
4
5
6
7
8
9
10
11
12
13
14
15
16
|
# File 'lib/informers/utils/audio.rb', line 3
def self.read_audio(input, sampling_rate)
data =
if input.is_a?(URI)
require "open-uri"
input.read
elsif input.is_a?(String)
File.binread(input)
else
raise ArgumentError, "Unsupported input type: #{input.class.name}"
end
ffmpeg_read(data, sampling_rate)
end
|
.reshape(arr, dims) ⇒ Object
64
65
66
67
68
69
70
|
# File 'lib/informers/utils/tensor.rb', line 64
def self.reshape(arr, dims)
arr = arr.flatten
dims[1..-1].reverse_each do |dim|
arr = arr.each_slice(dim)
end
arr.to_a
end
|
.sigmoid(arr) ⇒ Object
89
90
91
92
93
94
|
# File 'lib/informers/utils/math.rb', line 89
def self.sigmoid(arr)
if arr[0].is_a?(Array)
return arr.map { |a| sigmoid(a) }
end
arr.map { |v| 1 / (1 + Math.exp(-v)) }
end
|
.softmax(arr) ⇒ Object
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
|
# File 'lib/informers/utils/math.rb', line 73
def self.softmax(arr)
max_val = arr.max
exps = arr.map { |x| Math.exp(x - max_val) }
sum_exps = exps.sum
softmax_arr = exps.map { |x| x / sum_exps }
softmax_arr
end
|
.stack(tensors, dim = 0) ⇒ Object
26
27
28
|
# File 'lib/informers/utils/tensor.rb', line 26
def self.stack(tensors, dim = 0)
tensors
end
|