Class: Maxwell::Base
- Inherits:
-
Object
- Object
- Maxwell::Base
- Defined in:
- lib/maxwell.rb
Class Method Summary collapse
- .attr_accessor(*attrs) ⇒ Object
- .attrs ⇒ Object
- .concurrency(value) ⇒ Object
- .execute(args) ⇒ Object
- .javascript(value) ⇒ Object
Instance Method Summary collapse
Class Method Details
.attr_accessor(*attrs) ⇒ Object
33 34 35 36 37 |
# File 'lib/maxwell.rb', line 33 def attr_accessor(*attrs) @attrs ||= [] @attrs.concat attrs super end |
.attrs ⇒ Object
39 40 41 |
# File 'lib/maxwell.rb', line 39 def attrs @attrs || self.superclass.instance_eval("@attrs") end |
.concurrency(value) ⇒ Object
47 48 49 |
# File 'lib/maxwell.rb', line 47 def concurrency(value) @concurrency = value end |
.execute(args) ⇒ Object
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
# File 'lib/maxwell.rb', line 13 def execute(args) if !args[:urls].nil? urls = args[:urls] Parallel. map_with_index(urls, in_threads: @concurrency || 1) do |url, id| puts "\e[34m[#{id + 1}] scraping: #{ url }\e[0m" scraper = self.new html = Maxwell::Converter.call(url, @use_poltergeist) scraper.parser html ({ url: url }).merge(scraper.result).tap do |result_hash| scraper.handler result_hash end end else raise 'You need pass an argument urls: or raw_htmls:' end end |
.javascript(value) ⇒ Object
43 44 45 |
# File 'lib/maxwell.rb', line 43 def javascript(value) @use_poltergeist = value end |
Instance Method Details
#handler(result) ⇒ Object
56 57 58 |
# File 'lib/maxwell.rb', line 56 def handler result p result end |
#parser(html) ⇒ Object
52 53 54 |
# File 'lib/maxwell.rb', line 52 def parser html raise NoParserDefinedErr "You need to define #{self}#parser" end |
#result ⇒ Object
60 61 62 |
# File 'lib/maxwell.rb', line 60 def result self.class.attrs.map { |k| [k, self.send(k)] }.to_h end |