Module: BlingFire
- Defined in:
- lib/blingfire.rb,
lib/blingfire/ffi.rb,
lib/blingfire/model.rb,
lib/blingfire/version.rb
Defined Under Namespace
Modules: FFI Classes: Error, Model
Constant Summary collapse
- VERSION =
"0.2.0"
Class Attribute Summary collapse
-
.ffi_lib ⇒ Object
Returns the value of attribute ffi_lib.
Class Method Summary collapse
- .change_settings_dummy_prefix(model, value) ⇒ Object
- .free_model(model) ⇒ Object
- .ids_to_text(model, ids, skip_special_tokens: true, output_buffer_size: nil) ⇒ Object
- .lib_version ⇒ Object
- .load_model(path, **options) ⇒ Object
- .normalize_spaces(text) ⇒ Object
- .text_to_ids(model, text, max_len = nil, unk_id = 0) ⇒ Object
- .text_to_ids_with_offsets(model, text, max_len = nil, unk_id = 0) ⇒ Object
- .text_to_sentences(text) ⇒ Object
- .text_to_sentences_with_model(model, text) ⇒ Object
- .text_to_sentences_with_offsets(text) ⇒ Object
- .text_to_sentences_with_offsets_with_model(model, text) ⇒ Object
- .text_to_words(text) ⇒ Object
- .text_to_words_with_model(model, text) ⇒ Object
- .text_to_words_with_offsets(text) ⇒ Object
- .text_to_words_with_offsets_with_model(model, text) ⇒ Object
Class Attribute Details
.ffi_lib ⇒ Object
Returns the value of attribute ffi_lib.
12 13 14 |
# File 'lib/blingfire.rb', line 12 def ffi_lib @ffi_lib end |
Class Method Details
.change_settings_dummy_prefix(model, value) ⇒ Object
138 139 140 141 142 |
# File 'lib/blingfire.rb', line 138 def change_settings_dummy_prefix(model, value) # use opposite of value ret = FFI.SetNoDummyPrefix(model, value ? 0 : 1) raise Error, "Bad status: #{ret}" if ret != 1 end |
.free_model(model) ⇒ Object
125 126 127 |
# File 'lib/blingfire.rb', line 125 def free_model(model) FFI.FreeModel(model) end |
.ids_to_text(model, ids, skip_special_tokens: true, output_buffer_size: nil) ⇒ Object
116 117 118 119 120 121 122 123 |
# File 'lib/blingfire.rb', line 116 def ids_to_text(model, ids, skip_special_tokens: true, output_buffer_size: nil) output_buffer_size ||= ids.size * 32 c_ids = Fiddle::Pointer[ids.pack("i*")] out = Fiddle::Pointer.malloc(output_buffer_size) out_size = FFI.IdsToText(model, c_ids, ids.size, out, output_buffer_size, skip_special_tokens ? 1 : 0) check_status out_size, out encode_utf8(out.to_str(out_size - 1)) end |
.lib_version ⇒ Object
37 38 39 |
# File 'lib/blingfire.rb', line 37 def lib_version FFI.GetBlingFireTokVersion end |
.load_model(path, **options) ⇒ Object
41 42 43 |
# File 'lib/blingfire.rb', line 41 def load_model(path, **) Model.new(path, **) end |
.normalize_spaces(text) ⇒ Object
129 130 131 132 133 134 135 136 |
# File 'lib/blingfire.rb', line 129 def normalize_spaces(text) u_space = 0x20 text = encode_utf8(text.dup) unless text.encoding == Encoding::UTF_8 out = Fiddle::Pointer.malloc([text.bytesize * 1.5, 20].max) out_size = FFI.NormalizeSpaces(text, text.bytesize, out, out.size, u_space) check_status out_size, out encode_utf8(out.to_str(out_size)) end |
.text_to_ids(model, text, max_len = nil, unk_id = 0) ⇒ Object
93 94 95 96 97 98 99 |
# File 'lib/blingfire.rb', line 93 def text_to_ids(model, text, max_len = nil, unk_id = 0) text = encode_utf8(text.dup) unless text.encoding == Encoding::UTF_8 ids = Fiddle::Pointer.malloc((max_len || text.size) * Fiddle::SIZEOF_INT) out_size = FFI.TextToIds(model, text, text.bytesize, ids, ids.size, unk_id) check_status out_size, ids ids[0, (max_len || out_size) * Fiddle::SIZEOF_INT].unpack("i!*") end |
.text_to_ids_with_offsets(model, text, max_len = nil, unk_id = 0) ⇒ Object
101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
# File 'lib/blingfire.rb', line 101 def text_to_ids_with_offsets(model, text, max_len = nil, unk_id = 0) text = encode_utf8(text.dup) unless text.encoding == Encoding::UTF_8 ids = Fiddle::Pointer.malloc((max_len || text.size) * Fiddle::SIZEOF_INT) start_offsets = Fiddle::Pointer.malloc(Fiddle::SIZEOF_INT * ids.size) end_offsets = Fiddle::Pointer.malloc(Fiddle::SIZEOF_INT * ids.size) out_size = FFI.TextToIdsWithOffsets(model, text, text.bytesize, ids, start_offsets, end_offsets, ids.size, unk_id) check_status out_size, ids result = ids[0, (max_len || out_size) * Fiddle::SIZEOF_INT].unpack("i!*") [result].concat(unpack_offsets(start_offsets, end_offsets, result, text)) end |
.text_to_sentences(text) ⇒ Object
69 70 71 72 73 |
# File 'lib/blingfire.rb', line 69 def text_to_sentences(text) text_to(text, "\n") do |t, out| FFI.TextToSentences(t, t.bytesize, out, out.size) end end |
.text_to_sentences_with_model(model, text) ⇒ Object
75 76 77 78 79 |
# File 'lib/blingfire.rb', line 75 def text_to_sentences_with_model(model, text) text_to(text, "\n") do |t, out| FFI.TextToSentencesWithModel(t, t.bytesize, out, out.size, model) end end |
.text_to_sentences_with_offsets(text) ⇒ Object
81 82 83 84 85 |
# File 'lib/blingfire.rb', line 81 def text_to_sentences_with_offsets(text) text_to_with_offsets(text, "\n") do |t, out, start_offsets, end_offsets| FFI.TextToSentencesWithOffsets(t, t.bytesize, out, start_offsets, end_offsets, out.size) end end |
.text_to_sentences_with_offsets_with_model(model, text) ⇒ Object
87 88 89 90 91 |
# File 'lib/blingfire.rb', line 87 def text_to_sentences_with_offsets_with_model(model, text) text_to_with_offsets(text, "\n") do |t, out, start_offsets, end_offsets| FFI.TextToSentencesWithOffsetsWithModel(t, t.bytesize, out, start_offsets, end_offsets, out.size, model) end end |
.text_to_words(text) ⇒ Object
45 46 47 48 49 |
# File 'lib/blingfire.rb', line 45 def text_to_words(text) text_to(text, " ") do |t, out| FFI.TextToWords(t, t.bytesize, out, out.size) end end |
.text_to_words_with_model(model, text) ⇒ Object
51 52 53 54 55 |
# File 'lib/blingfire.rb', line 51 def text_to_words_with_model(model, text) text_to(text, " ") do |t, out| FFI.TextToWordsWithModel(t, t.bytesize, out, out.size, model) end end |
.text_to_words_with_offsets(text) ⇒ Object
57 58 59 60 61 |
# File 'lib/blingfire.rb', line 57 def text_to_words_with_offsets(text) text_to_with_offsets(text, " ") do |t, out, start_offsets, end_offsets| FFI.TextToWordsWithOffsets(t, t.bytesize, out, start_offsets, end_offsets, out.size) end end |
.text_to_words_with_offsets_with_model(model, text) ⇒ Object
63 64 65 66 67 |
# File 'lib/blingfire.rb', line 63 def text_to_words_with_offsets_with_model(model, text) text_to_with_offsets(text, " ") do |t, out, start_offsets, end_offsets| FFI.TextToWordsWithOffsetsWithModel(t, t.bytesize, out, start_offsets, end_offsets, out.size, model) end end |