Class: String
- Inherits:
-
Object
- Object
- String
- Defined in:
- lib/rbbt/bow/bow.rb,
lib/rbbt/bow/misc.rb
Constant Summary collapse
- CONSONANTS =
[]
Instance Method Summary collapse
-
#arabic ⇒ Object
Turns a roman number into arabic form is possible.
-
#bigrams ⇒ Object
Shortcut for BagOfWords.bigrams(self).
-
#downcase_first ⇒ Object
Turns the first letter to lowercase.
-
#is_special? ⇒ Boolean
Uses heuristics to checks if a string seems like a special word, like a gene name.
-
#words ⇒ Object
Shortcut for BagOfWords.words(self).
Instance Method Details
#arabic ⇒ Object
Turns a roman number into arabic form is possible. Just simple romans only…
80 81 82 83 84 85 86 87 88 89 |
# File 'lib/rbbt/bow/misc.rb', line 80 def arabic return 1 if self =~ /^I$/; return 2 if self =~ /^II$/; return 3 if self =~ /^III$/; return 4 if self =~ /^IV$/; return 5 if self =~ /^V$/; return 10 if self =~ /^X$/; return nil end |
#bigrams ⇒ Object
Shortcut for BagOfWords.bigrams(self)
86 87 88 |
# File 'lib/rbbt/bow/bow.rb', line 86 def bigrams BagOfWords.bigrams(self) end |
#downcase_first ⇒ Object
Turns the first letter to lowercase
71 72 73 74 75 76 |
# File 'lib/rbbt/bow/misc.rb', line 71 def downcase_first return "" if self == "" letters = self.scan(/./) letters[0].downcase! letters.join("") end |
#is_special? ⇒ Boolean
Uses heuristics to checks if a string seems like a special word, like a gene name.
45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
# File 'lib/rbbt/bow/misc.rb', line 45 def is_special? # Only consonants return true if self =~ /^[bcdfghjklmnpqrstvwxz]+$/i # Not a word return false if self =~ /[^\s]\s[^\s]/; return false if self.length < 3; # Alphanumeric return true if self =~ /[0-9]/ && self =~ /[a-z]/i # All Caps return true if self =~ /[A-Z]{2,}/; # Caps Mix return true if self =~ /[a-z][A-Z]/; # All consonants return true if self =~ /^[a-z]$/i && self !~ /[aeiou]/i # Dashed word return true if self =~ /(^\w-|-\w$)/ # To many consonants (very heuristic) if self =~ /([^aeiouy]{3,})/i && !CONSONANTS.include?($1.downcase) return true end return false end |
#words ⇒ Object
Shortcut for BagOfWords.words(self)
81 82 83 |
# File 'lib/rbbt/bow/bow.rb', line 81 def words BagOfWords.words(self) end |