Module: Bm25::Utils
- Defined in:
- lib/bm25/utils.rb
Class Method Summary collapse
Class Method Details
.separate_document(document) ⇒ Object
19 20 21 22 |
# File 'lib/bm25/utils.rb', line 19 def separate_document(document) docs = document.scan(/[^。^\.^\,\r\n|\n|\r]+/) return docs end |
.separate_words(document) ⇒ Object
7 8 9 10 11 12 13 14 15 16 17 |
# File 'lib/bm25/utils.rb', line 7 def separate_words(document) nm = Natto::MeCab.new data = [] nm.parse(document) do |n| if Bm25::Validator.validate_word(n) next end data << n.surface end return data end |