Module: Spellchecker::DetectTypo

Defined in:: lib/spellchecker/detect_typo.rb

Constant Summary collapse

PROPER_NAME_REGEXP =

/\A(?:[a-z]+[A-Z])|(?:[A-Z]+.+[A-Z]+)|(?:[A-Z]{2,}[^A-Z]+)/.freeze

ABBREVIATION_REGEXP =

/\A(?:[A-Z]{2,4})|(?:[A-Z][a-z])\z/.freeze

LENGTH_LIMIT =

ABBREVIATION_LENGTH =

NUMBER_SHORTENING_SUFFIX =

'th'

SHORTENINGS =

Set.new(%w[ver]).freeze

Class Method Summary collapse

Class Method Details

.abbreviation?(token) ⇒ `Boolean`

Parameters:

token (Spellchecker::Tokenizer::Token)

Returns:

(Boolean)

# File 'lib/spellchecker/detect_typo.rb', line 47

def abbreviation?(token)
  return true if ABBREVIATION_REGEXP.match?(token.text)
  return true if token.text.length <= ABBREVIATION_LENGTH &&
                 !token.prev.word? && !token.next.word?

  false
end

.call(token) ⇒ `Spellchecker::Mistake`^?

Parameters:

token (Spellchecker::Tokenizer::Token)

Returns:

(Spellchecker::Mistake, nil)

# File 'lib/spellchecker/detect_typo.rb', line 17

def call(token)
  word = token.text

  return if word.length < LENGTH_LIMIT

  correction = Dictionaries::TyposList.match_token(token)

  return unless correction
  return if PROPER_NAME_REGEXP.match?(word)
  return if abbreviation?(token) || shortening?(token)
  return if Dictionaries::EnglishWords.include?(Utils.replace_quote(word))

  return if token.capital? && proper_noun?(word)

  correction = correction.sub(/\S/, &:upcase) if token.capital?

  Mistake.new(text: word, correction: correction,
              position: token.position, type: MistakeTypes::SPELLING)
end

.proper_noun?(word) ⇒ `Boolean`

Parameters:

word (String)

Returns:

(Boolean)

# File 'lib/spellchecker/detect_typo.rb', line 39

def proper_noun?(word)
  Dictionaries::HumanNames.include?(word) ||
    Dictionaries::CompanyNames.include?(word) ||
    Dictionaries::UsToponyms.include?(word)
end

.shortening?(token) ⇒ `Boolean`

Parameters:

token (Spellchecker::Tokenizer::Token)

Returns:

(Boolean)

# File 'lib/spellchecker/detect_typo.rb', line 57

def shortening?(token)
  return true if token.text == NUMBER_SHORTENING_SUFFIX && token.prev.digit?
  return true if SHORTENINGS.include?(token.downcased) &&
                 (token.next.dot? || token.next.digit?)

  false
end

Module: Spellchecker::DetectTypo

Constant Summary collapse

Class Method Summary collapse

Class Method Details

.abbreviation?(token) ⇒ Boolean

.call(token) ⇒ Spellchecker::Mistake?

.proper_noun?(word) ⇒ Boolean

.shortening?(token) ⇒ Boolean

.abbreviation?(token) ⇒ `Boolean`

.call(token) ⇒ `Spellchecker::Mistake`^?

.proper_noun?(word) ⇒ `Boolean`

.shortening?(token) ⇒ `Boolean`