Module: Spellchecker::DetectNgram
- Defined in:
- lib/spellchecker/detect_ngram.rb
Constant Summary collapse
- NGRAM_RANGE =
(1..4).freeze
- SEPARATOR_REGEXP =
/[;,\n.]/.freeze
Class Method Summary collapse
- .call(token) ⇒ Spellchecker::Mistake?
- .fetch_original_text(token, index) ⇒ String
- .find_ngram(token) ⇒ Array<(String, String)>?
Class Method Details
.call(token) ⇒ Spellchecker::Mistake?
12 13 14 15 16 17 18 19 20 21 |
# File 'lib/spellchecker/detect_ngram.rb', line 12 def call(token) text, correction = find_ngram(token) return unless correction correction = correction.sub(/\S/, &:upcase) if text.match?(/\A[A-Z]/) Mistake.new(text: text, correction: correction, position: token.position, type: MistakeTypes::GRAMMAR) end |
.fetch_original_text(token, index) ⇒ String
42 43 44 45 46 47 48 49 |
# File 'lib/spellchecker/detect_ngram.rb', line 42 def fetch_original_text(token, index) _, list = (index + 1).times.reduce([token, []]) do |(t, acc), _| [t.prev, acc.prepend(t.text)] end list.join(' ') end |
.find_ngram(token) ⇒ Array<(String, String)>?
25 26 27 28 29 30 31 32 33 34 35 36 37 |
# File 'lib/spellchecker/detect_ngram.rb', line 25 def find_ngram(token) NGRAM_RANGE.each_with_object([token.normalized]) do |i, list| token = token.next break if token.text.match?(SEPARATOR_REGEXP) list << token.normalized correction = Dictionaries::NgramList.match(list) break fetch_original_text(token, i), correction if correction break if i == NGRAM_RANGE.end end end |