Module: LCBO::CrawlKit::TagHelper
- Defined in:
- lib/lcbo/crawlkit/tag_helper.rb
Constant Summary collapse
- DELETION_RE =
/\"|\\|\/|\(|\)|\[|\]|\./
- WHITESPACE_RE =
/\*|\+|\&|\_|\,|\s/
Class Method Summary collapse
Class Method Details
.[](*values) ⇒ Object
47 48 49 50 |
# File 'lib/lcbo/crawlkit/tag_helper.rb', line 47 def self.[](*values) return [] if values.all? { |val| '' == val.to_s.strip } split(flatten(values)) end |
.flatten(values) ⇒ Object
7 8 9 10 11 12 |
# File 'lib/lcbo/crawlkit/tag_helper.rb', line 7 def self.flatten(values) TitleCaseHelper.downcase(values.flatten.join(' ')). gsub(DELETION_RE, ''). gsub(WHITESPACE_RE, ' '). strip end |
.split(str) ⇒ Object
14 15 16 17 18 19 20 21 |
# File 'lib/lcbo/crawlkit/tag_helper.rb', line 14 def self.split(str) [str, str.to_ascii]. join(' '). split. map { |word| stem(word) }. flatten. uniq end |
.stem(word) ⇒ Object
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
# File 'lib/lcbo/crawlkit/tag_helper.rb', line 23 def self.stem(word) split = lambda { |word| if word.include?('-') words = word.split('-') a = words.dup a << word a << words.join a else [word] end } tokenize = lambda { |words| words.reduce([]) do |tokens, word| tokens << word tokens << word.gsub("'", '') if word.include?("'") tokens end } tokenize.(split.(word)) end |