Class: Maxwell::Base

Inherits:
Object
  • Object
show all
Defined in:
lib/maxwell.rb

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.attr_accessor(*attrs) ⇒ Object



33
34
35
36
37
# File 'lib/maxwell.rb', line 33

def attr_accessor(*attrs)
  @attrs ||= []
  @attrs.concat attrs
  super
end

.attrsObject



39
40
41
# File 'lib/maxwell.rb', line 39

def attrs
  @attrs || self.superclass.instance_eval("@attrs")
end

.concurrency(value) ⇒ Object



47
48
49
# File 'lib/maxwell.rb', line 47

def concurrency(value)
  @concurrency = value
end

.execute(args) ⇒ Object



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# File 'lib/maxwell.rb', line 13

def execute(args)
  if !args[:urls].nil?
    urls = args[:urls]
    Parallel.
      map_with_index(urls, in_threads: @concurrency || 1) do |url, id|
        puts "\e[34m[#{id + 1}] scraping: #{ url }\e[0m"

        scraper = self.new
        html = Maxwell::Converter.call(url, @use_poltergeist)

        scraper.parser html
        ({ url: url }).merge(scraper.result).tap do |result_hash|
          scraper.handler result_hash
        end
      end
  else
    raise 'You need pass an argument urls: or raw_htmls:'
  end
end

.javascript(value) ⇒ Object



43
44
45
# File 'lib/maxwell.rb', line 43

def javascript(value)
  @use_poltergeist = value
end

Instance Method Details

#handler(result) ⇒ Object



56
57
58
# File 'lib/maxwell.rb', line 56

def handler result
  p result
end

#parser(html) ⇒ Object



52
53
54
# File 'lib/maxwell.rb', line 52

def parser html
  raise NoParserDefinedErr "You need to define #{self}#parser"
end

#resultObject



60
61
62
# File 'lib/maxwell.rb', line 60

def result
  self.class.attrs.map { |k| [k, self.send(k)]  }.to_h
end