Class: Socializer::Scraper::Extractor

Inherits:
Object
  • Object
show all
Includes:
Collector
Defined in:
lib/socializer/scraper/extractor.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods included from Collector

#email_collector, #link_collector, #live_link_collector, #sitemap_collector, #social_profile_collector

Constructor Details

#initialize(options = {}) ⇒ Extractor

Returns a new instance of Extractor.



10
11
12
13
# File 'lib/socializer/scraper/extractor.rb', line 10

def initialize options = {}
  self.url = options.fetch(:url, nil)
  self.collectors = options.fetch(:collectors, [])
end

Instance Attribute Details

#collectorsObject



26
27
28
# File 'lib/socializer/scraper/extractor.rb', line 26

def collectors
  @collectors.any? ? @collectors : self.class.available_collectors
end

#urlObject

Returns the value of attribute url.



7
8
9
# File 'lib/socializer/scraper/extractor.rb', line 7

def url
  @url
end

Class Method Details

.available_collectorsObject



53
54
55
56
57
58
59
# File 'lib/socializer/scraper/extractor.rb', line 53

def available_collectors
  self.instance_methods.select do |name|
    name.to_s.end_with?("_collector")
  end.map do |name|
    name.to_s.gsub(/_collector$/, '').to_sym
  end
end

Instance Method Details

#run(*patterns, &block) ⇒ Object



30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/socializer/scraper/extractor.rb', line 30

def run *patterns, &block
  data, options = {}, patterns.extract_options!
  page_wise = options.delete(:page_wise)
  patterns  = patterns.push(options)

  perform(*patterns) do |page|
    collectors.each do |collector|
      found = send("#{collector}_collector")
      yield(page, collector, found) if block_given?
      if page_wise
        data[collector] ||= {}
        data[collector][@current_url] = found
      else
        data[collector] ||= []
        data[collector].push found
      end
    end
  end

  data.hash_map{|kind, list| [kind, list.hashify_or_collect]}
end