Module: Wombat::Processing::Parser
- Included in:
- Crawler
- Defined in:
- lib/wombat/processing/parser.rb
Constant Summary collapse
- HTTP_METHODS =
[:get, :post, :put, :patch, :delete, :head]
Instance Attribute Summary collapse
-
#context ⇒ Object
Returns the value of attribute context.
-
#mechanize ⇒ Object
Returns the value of attribute mechanize.
-
#page ⇒ Object
Returns the value of attribute page.
-
#response_code ⇒ Object
Returns the value of attribute response_code.
Instance Method Summary collapse
Instance Attribute Details
#context ⇒ Object
Returns the value of attribute context.
27 28 29 |
# File 'lib/wombat/processing/parser.rb', line 27 def context @context end |
#mechanize ⇒ Object
Returns the value of attribute mechanize.
27 28 29 |
# File 'lib/wombat/processing/parser.rb', line 27 def mechanize @mechanize end |
#page ⇒ Object
Returns the value of attribute page.
27 28 29 |
# File 'lib/wombat/processing/parser.rb', line 27 def page @page end |
#response_code ⇒ Object
Returns the value of attribute response_code.
27 28 29 |
# File 'lib/wombat/processing/parser.rb', line 27 def response_code @response_code end |
Instance Method Details
#initialize ⇒ Object
29 30 31 32 33 34 35 36 37 38 39 40 41 |
# File 'lib/wombat/processing/parser.rb', line 29 def initialize # http://stackoverflow.com/questions/6918277/ruby-mechanize-web-scraper-library-returns-file-instead-of-page @mechanize = Mechanize.new { |a| a.post_connect_hooks << lambda { |_,_,response,_| if response.content_type.nil? || response.content_type.empty? response.content_type = 'text/html' end } } @mechanize.set_proxy(*Wombat.proxy_args) if Wombat.proxy_args @mechanize.user_agent = Wombat.user_agent if Wombat.user_agent @mechanize.user_agent_alias = Wombat.user_agent_alias if Wombat.user_agent_alias end |
#parse(metadata, url = nil) ⇒ Object
43 44 45 46 47 |
# File 'lib/wombat/processing/parser.rb', line 43 def parse(, url=nil) @context = parser_for(, url) Wombat::Property::Locators::Factory.locator_for().locate(@context, @mechanize) end |