Module: Bm25::Utils

Defined in:
lib/bm25/utils.rb

Class Method Summary collapse

Class Method Details

.separate_document(document) ⇒ Object



19
20
21
22
# File 'lib/bm25/utils.rb', line 19

def separate_document(document)
  docs = document.scan(/[^。^\.^\,\r\n|\n|\r]+/)
  return docs
end

.separate_words(document) ⇒ Object



7
8
9
10
11
12
13
14
15
16
17
# File 'lib/bm25/utils.rb', line 7

def separate_words(document)
  nm = Natto::MeCab.new
  data = []
  nm.parse(document) do |n|
    if Bm25::Validator.validate_word(n)
      next
    end
    data << n.surface
  end
  return data
end