Module: Wombat::Processing::Parser

Included in:
Crawler
Defined in:
lib/wombat/processing/parser.rb

Constant Summary collapse

HTTP_METHODS =
[:get, :post, :put, :patch, :delete, :head]

Instance Attribute Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#contextObject

Returns the value of attribute context.



27
28
29
# File 'lib/wombat/processing/parser.rb', line 27

def context
  @context
end

#mechanizeObject

Returns the value of attribute mechanize.



27
28
29
# File 'lib/wombat/processing/parser.rb', line 27

def mechanize
  @mechanize
end

#pageObject

Returns the value of attribute page.



27
28
29
# File 'lib/wombat/processing/parser.rb', line 27

def page
  @page
end

#response_codeObject

Returns the value of attribute response_code.



27
28
29
# File 'lib/wombat/processing/parser.rb', line 27

def response_code
  @response_code
end

Instance Method Details

#initializeObject



29
30
31
32
33
34
35
36
37
38
39
40
41
# File 'lib/wombat/processing/parser.rb', line 29

def initialize
  # http://stackoverflow.com/questions/6918277/ruby-mechanize-web-scraper-library-returns-file-instead-of-page
  @mechanize = Mechanize.new { |a|
    a.post_connect_hooks << lambda { |_,_,response,_|
      if response.content_type.nil? || response.content_type.empty?
        response.content_type = 'text/html'
      end
    }
  }
  @mechanize.set_proxy(*Wombat.proxy_args) if Wombat.proxy_args
  @mechanize.user_agent = Wombat.user_agent if Wombat.user_agent
  @mechanize.user_agent_alias = Wombat.user_agent_alias if Wombat.user_agent_alias
end

#parse(metadata, url = nil) ⇒ Object



43
44
45
46
47
# File 'lib/wombat/processing/parser.rb', line 43

def parse(, url=nil)
  @context = parser_for(, url)

  Wombat::Property::Locators::Factory.locator_for().locate(@context, @mechanize)
end