Module: Spellchecker::DetectTypo

Defined in:
lib/spellchecker/detect_typo.rb

Constant Summary collapse

PROPER_NAME_REGEXP =
/\A(?:[a-z]+[A-Z])|(?:[A-Z]+.+[A-Z]+)|(?:[A-Z]{2,}[^A-Z]+)/.freeze
ABBREVIATION_REGEXP =
/\A(?:[A-Z]{2,4})|(?:[A-Z][a-z])\z/.freeze
LENGTH_LIMIT =
2
ABBREVIATION_LENGTH =
2
NUMBER_SHORTENING_SUFFIX =
'th'
SHORTENINGS =
Set.new(%w[ver]).freeze

Class Method Summary collapse

Class Method Details

.abbreviation?(token) ⇒ Boolean

Parameters:

Returns:

  • (Boolean)


47
48
49
50
51
52
53
# File 'lib/spellchecker/detect_typo.rb', line 47

def abbreviation?(token)
  return true if ABBREVIATION_REGEXP.match?(token.text)
  return true if token.text.length <= ABBREVIATION_LENGTH &&
                 !token.prev.word? && !token.next.word?

  false
end

.call(token) ⇒ Spellchecker::Mistake?

Parameters:

Returns:



17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# File 'lib/spellchecker/detect_typo.rb', line 17

def call(token)
  word = token.text

  return if word.length < LENGTH_LIMIT

  correction = Dictionaries::TyposList.match_token(token)

  return unless correction
  return if PROPER_NAME_REGEXP.match?(word)
  return if abbreviation?(token) || shortening?(token)
  return if Dictionaries::EnglishWords.include?(Utils.replace_quote(word))

  return if token.capital? && proper_noun?(word)

  correction = correction.sub(/\S/, &:upcase) if token.capital?

  Mistake.new(text: word, correction: correction,
              position: token.position, type: MistakeTypes::SPELLING)
end

.proper_noun?(word) ⇒ Boolean

Parameters:

  • word (String)

Returns:

  • (Boolean)


39
40
41
42
43
# File 'lib/spellchecker/detect_typo.rb', line 39

def proper_noun?(word)
  Dictionaries::HumanNames.include?(word) ||
    Dictionaries::CompanyNames.include?(word) ||
    Dictionaries::UsToponyms.include?(word)
end

.shortening?(token) ⇒ Boolean

Parameters:

Returns:

  • (Boolean)


57
58
59
60
61
62
63
# File 'lib/spellchecker/detect_typo.rb', line 57

def shortening?(token)
  return true if token.text == NUMBER_SHORTENING_SUFFIX && token.prev.digit?
  return true if SHORTENINGS.include?(token.downcased) &&
                 (token.next.dot? || token.next.digit?)

  false
end