Class: Yawc
- Inherits:
-
Object
- Object
- Yawc
- Defined in:
- lib/yawc.rb
Instance Attribute Summary collapse
-
#to_h ⇒ Object
readonly
Returns the value of attribute to_h.
Instance Method Summary collapse
-
#initialize(s, level: 2) ⇒ Yawc
constructor
level: 2 strips out ignore_words and stop_words 3 strips out dictionary words.
- #words(s) ⇒ Object
Constructor Details
#initialize(s, level: 2) ⇒ Yawc
level:
2 strips out ignore_words and stop_words
3 strips out dictionary words
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
# File 'lib/yawc.rb', line 27 def initialize(s, level: 2) @stopwords = Words2DotDat.stopwords a = case level when 2 words(s).split when 3 list = words(s).split list - Words2DotDat.words end h = a.group_by(&:to_s).\ inject({}){|r, x| r.merge(String.new(x[0]).\ force_encoding("utf-8") => x[-1].length)} @to_h = h.sort_by(&:last).reverse.to_h end |
Instance Attribute Details
#to_h ⇒ Object (readonly)
Returns the value of attribute to_h.
21 22 23 |
# File 'lib/yawc.rb', line 21 def to_h @to_h end |
Instance Method Details
#words(s) ⇒ Object
46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
# File 'lib/yawc.rb', line 46 def words(s) # words source: http://norvig.com/mayzner.html ignorewords = ["the", "of", "and", "to", "in", "a", "is", "that", "for", "it", "as", "was", "with", "be", "by", "on", "not", "he", "i", "this", "are", "or","his", "from", "at", "which", "but", "have", "an", "had", "they", "you", "were", "their", "one", "all", "we", "can", "her", "has", "there", "been", "if", "more", "when", "will", "would", "who", "so", "no"] s.downcase. gsub(/\w+'\w+/,''). # remove words containing an apostrophe gsub(/["']/,''). # remove quotation marks gsub(/\W[^a-z|#]*(\w+)/,' \1 '). # remove # non-alpabetical characters from start or beginning of words gsub(/\s.\s/,' '). # remove single digits gsub(/\b(?:#{(ignorewords + @stopwords).join('|')})\b/,''). # ignore common words gsub(/\B[^\w#]\B+/,'') # remove any other items which are # not words or hashtags end |