Module: Spellchecker::DetectTypo
- Defined in:
- lib/spellchecker/detect_typo.rb
Constant Summary collapse
- PROPER_NAME_REGEXP =
/\A(?:[a-z]+[A-Z])|(?:[A-Z]+.+[A-Z]+)|(?:[A-Z]{2,}[^A-Z]+)/.freeze
- ABBREVIATION_REGEXP =
/\A(?:[A-Z]{2,4})|(?:[A-Z][a-z])\z/.freeze
- LENGTH_LIMIT =
2
- ABBREVIATION_LENGTH =
2
- NUMBER_SHORTENING_SUFFIX =
'th'
- SHORTENINGS =
Set.new(%w[ver]).freeze
Class Method Summary collapse
- .abbreviation?(token) ⇒ Boolean
- .call(token) ⇒ Spellchecker::Mistake?
- .proper_noun?(word) ⇒ Boolean
- .shortening?(token) ⇒ Boolean
Class Method Details
.abbreviation?(token) ⇒ Boolean
47 48 49 50 51 52 53 |
# File 'lib/spellchecker/detect_typo.rb', line 47 def abbreviation?(token) return true if ABBREVIATION_REGEXP.match?(token.text) return true if token.text.length <= ABBREVIATION_LENGTH && !token.prev.word? && !token.next.word? false end |
.call(token) ⇒ Spellchecker::Mistake?
17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
# File 'lib/spellchecker/detect_typo.rb', line 17 def call(token) word = token.text return if word.length < LENGTH_LIMIT correction = Dictionaries::TyposList.match_token(token) return unless correction return if PROPER_NAME_REGEXP.match?(word) return if abbreviation?(token) || shortening?(token) return if Dictionaries::EnglishWords.include?(Utils.replace_quote(word)) return if token.capital? && proper_noun?(word) correction = correction.sub(/\S/, &:upcase) if token.capital? Mistake.new(text: word, correction: correction, position: token.position, type: MistakeTypes::SPELLING) end |
.proper_noun?(word) ⇒ Boolean
39 40 41 42 43 |
# File 'lib/spellchecker/detect_typo.rb', line 39 def proper_noun?(word) Dictionaries::HumanNames.include?(word) || Dictionaries::CompanyNames.include?(word) || Dictionaries::UsToponyms.include?(word) end |
.shortening?(token) ⇒ Boolean
57 58 59 60 61 62 63 |
# File 'lib/spellchecker/detect_typo.rb', line 57 def shortening?(token) return true if token.text == NUMBER_SHORTENING_SUFFIX && token.prev.digit? return true if SHORTENINGS.include?(token.downcased) && (token.next.dot? || token.next.digit?) false end |