Class: Stratagem::Crawler::SiteModel::Page

Inherits:
Object
  • Object
show all
Includes:
HtmlUtils
Defined in:
lib/stratagem/crawler/site_model/page.rb

Constant Summary

Constants included from HtmlUtils

HtmlUtils::INPUT_BUTTON, HtmlUtils::INPUT_RADIO, HtmlUtils::INPUT_TEXT, HtmlUtils::INPUT_TOGGLE

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from HtmlUtils

#find_login_form, #parse_forms

Constructor Details

#initialize(site_model, controller, request, response, invocations, model_changes, &block) ⇒ Page

Returns a new instance of Page.



15
16
17
18
19
20
21
# File 'lib/stratagem/crawler/site_model/page.rb', line 15

def initialize(site_model, controller, request, response, invocations, model_changes, &block)
  @site_model = site_model
  @invocations = invocations
  @model_changes = model_changes
  @authenticity_checked = (controller && controller.methods.include?(:authenticity_checked?)) ? controller.authenticity_checked? : true
  init(request, response, &block)
end

Instance Attribute Details

#documentObject

Returns the value of attribute document.



12
13
14
# File 'lib/stratagem/crawler/site_model/page.rb', line 12

def document
  @document
end

#methodObject

Returns the value of attribute method.



10
11
12
# File 'lib/stratagem/crawler/site_model/page.rb', line 10

def method
  @method
end

#pathObject

Returns the value of attribute path.



9
10
11
# File 'lib/stratagem/crawler/site_model/page.rb', line 9

def path
  @path
end

#redirected_toObject

Returns the value of attribute redirected_to.



11
12
13
# File 'lib/stratagem/crawler/site_model/page.rb', line 11

def redirected_to
  @redirected_to
end

#responseObject (readonly)

Returns the value of attribute response.



6
7
8
# File 'lib/stratagem/crawler/site_model/page.rb', line 6

def response
  @response
end

#response_bodyObject

Returns the value of attribute response_body.



13
14
15
# File 'lib/stratagem/crawler/site_model/page.rb', line 13

def response_body
  @response_body
end

#urlObject

Returns the value of attribute url.



8
9
10
# File 'lib/stratagem/crawler/site_model/page.rb', line 8

def url
  @url
end

Instance Method Details

#exportObject



27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/stratagem/crawler/site_model/page.rb', line 27

def export
  h = {
    :external_id => self.object_id,
    :url => url,
    :path => path,
    :request_method => method,
    :redirected_to_page_external_id => redirected_to ? redirected_to.object_id : nil,
    :route_external_id => route ? route.object_id : nil,
    :references_attributes => @invocations.map {|i| i.to_reference.export },
    :model_changes => Hash[@model_changes.map {|model,changes| [model.object_id, changes] }].to_json,
    :authenticity_checked => @authenticity_checked,
    :parameters => @request.parameters.to_json,
    :response_body => @response_body,
    :response_code => @response.code
  }
  h
end

#formsObject



82
83
84
85
86
87
88
89
90
91
92
93
94
# File 'lib/stratagem/crawler/site_model/page.rb', line 82

def forms
  @forms ||= begin
    if (@document)
      forms = self.parse_forms(@document)
      forms.each do |form|
        form.page = self
      end
      forms
    else
      []
    end
  end
end

#inbound_edges(type = nil) ⇒ Object



108
109
110
# File 'lib/stratagem/crawler/site_model/page.rb', line 108

def inbound_edges(type=nil)
  @site_model.edges.select {|edge| (edge.to == self) && (type.nil? || (type == edge.type)) }
end

#init(request, response, &block) ⇒ Object



45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# File 'lib/stratagem/crawler/site_model/page.rb', line 45

def init(request, response, &block)
  @request = request.clone
  @response = response.clone
  @url = request.url
  @path = request.path
  @method = request.method
  body = response.body
  begin
    if [500, 400, 302].include?(response.code)
      body = '' 
    elsif 
      body.kind_of?(Array)
      body = body.join
      body = body.slice(0,20000)
    end
    
    @document = Nokogiri::HTML(body)
  rescue
    puts "ERROR: Could not parse html: #{$!.message} - #{body}"
  end

  @response_body = body

  self.redirected_to = block.call(response.redirect_url) if response.redirect?
end

#login_formObject



96
97
98
# File 'lib/stratagem/crawler/site_model/page.rb', line 96

def 
  self.(@document)
end

#outbound_edges(type = nil) ⇒ Object



104
105
106
# File 'lib/stratagem/crawler/site_model/page.rb', line 104

def outbound_edges(type=nil)
  @site_model.edges.select {|edge| edge.from == self && (type.nil? || (type == edge.type)) }
end

#redirected?Boolean

Returns:

  • (Boolean)


78
79
80
# File 'lib/stratagem/crawler/site_model/page.rb', line 78

def redirected?
  !self.redirected_to.nil?
end

#reload(&block) ⇒ Object



71
72
73
74
75
76
# File 'lib/stratagem/crawler/site_model/page.rb', line 71

def reload(&block)
  # TODO - should support all the verbs and params, but
  # hack together for now to reload the authenticity token
  request,response = yield url
  init(request, response) {|redirected_to| }
end

#routeObject



23
24
25
# File 'lib/stratagem/crawler/site_model/page.rb', line 23

def route
  @route ||= Stratagem::Model::Application.instance.routes.recognize(self)
end

#titleObject



112
113
114
115
116
117
118
# File 'lib/stratagem/crawler/site_model/page.rb', line 112

def title
  if ((@document) && !(@title))
    title = (@document/'head title').first
    @title = title.inner_html if title
  end
  @title
end

#to_htmlObject



100
101
102
# File 'lib/stratagem/crawler/site_model/page.rb', line 100

def to_html
  @document.to_html
end