Module: Wombat::Processing::Parser

Included in:: Crawler

Defined in:: lib/wombat/processing/parser.rb

Constant Summary collapse

HTTP_METHODS =

[:get, :post, :put, :patch, :delete, :head]

Instance Attribute Summary collapse

#context ⇒ Object

Returns the value of attribute context.
#mechanize ⇒ Object

Returns the value of attribute mechanize.
#page ⇒ Object

Returns the value of attribute page.
#response_code ⇒ Object

Returns the value of attribute response_code.

Instance Method Summary collapse

Instance Attribute Details

#context ⇒ `Object`

Returns the value of attribute context.



27
28
29

# File 'lib/wombat/processing/parser.rb', line 27

def context
  @context
end

#mechanize ⇒ `Object`

Returns the value of attribute mechanize.



27
28
29

# File 'lib/wombat/processing/parser.rb', line 27

def mechanize
  @mechanize
end

#page ⇒ `Object`

Returns the value of attribute page.



27
28
29

# File 'lib/wombat/processing/parser.rb', line 27

def page
  @page
end

#response_code ⇒ `Object`

Returns the value of attribute response_code.



27
28
29

# File 'lib/wombat/processing/parser.rb', line 27

def response_code
  @response_code
end

Instance Method Details

#initialize ⇒ `Object`

# File 'lib/wombat/processing/parser.rb', line 29

def initialize
  # http://stackoverflow.com/questions/6918277/ruby-mechanize-web-scraper-library-returns-file-instead-of-page
  @mechanize = Mechanize.new { |a|
    a.post_connect_hooks << lambda { |_,_,response,_|
      if response.content_type.nil? || response.content_type.empty?
        response.content_type = 'text/html'
      end
    }
  }
  @mechanize.set_proxy(*Wombat.proxy_args) if Wombat.proxy_args
  @mechanize.user_agent = Wombat.user_agent if Wombat.user_agent
  @mechanize.user_agent_alias = Wombat.user_agent_alias if Wombat.user_agent_alias
end

#parse(metadata, url = nil) ⇒ `Object`

# File 'lib/wombat/processing/parser.rb', line 43

def parse(metadata, url=nil)
  @context = parser_for(metadata, url)

  Wombat::Property::Locators::Factory.locator_for(metadata).locate(@context, @mechanize)
end

Module: Wombat::Processing::Parser

Constant Summary collapse

Instance Attribute Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#context ⇒ Object

#mechanize ⇒ Object

#page ⇒ Object

#response_code ⇒ Object

Instance Method Details

#initialize ⇒ Object

#parse(metadata, url = nil) ⇒ Object

#context ⇒ `Object`

#mechanize ⇒ `Object`

#page ⇒ `Object`

#response_code ⇒ `Object`

#initialize ⇒ `Object`

#parse(metadata, url = nil) ⇒ `Object`