Class: Stratagem::Crawler::SiteModel::Page
Constant Summary
Constants included
from HtmlUtils
HtmlUtils::INPUT_BUTTON, HtmlUtils::INPUT_RADIO, HtmlUtils::INPUT_TEXT, HtmlUtils::INPUT_TOGGLE
Instance Attribute Summary collapse
Instance Method Summary
collapse
Methods included from HtmlUtils
#find_login_form, #parse_forms
Constructor Details
#initialize(site_model, controller, request, response, invocations, model_changes, &block) ⇒ Page
Returns a new instance of Page.
15
16
17
18
19
20
21
|
# File 'lib/stratagem/crawler/site_model/page.rb', line 15
def initialize(site_model, controller, request, response, invocations, model_changes, &block)
@site_model = site_model
@invocations = invocations
@model_changes = model_changes
@authenticity_checked = (controller && controller.methods.include?(:authenticity_checked?)) ? controller.authenticity_checked? : true
init(request, response, &block)
end
|
Instance Attribute Details
Returns the value of attribute document.
12
13
14
|
# File 'lib/stratagem/crawler/site_model/page.rb', line 12
def document
@document
end
|
Returns the value of attribute method.
10
11
12
|
# File 'lib/stratagem/crawler/site_model/page.rb', line 10
def method
@method
end
|
Returns the value of attribute path.
9
10
11
|
# File 'lib/stratagem/crawler/site_model/page.rb', line 9
def path
@path
end
|
#redirected_to ⇒ Object
Returns the value of attribute redirected_to.
11
12
13
|
# File 'lib/stratagem/crawler/site_model/page.rb', line 11
def redirected_to
@redirected_to
end
|
Returns the value of attribute response.
6
7
8
|
# File 'lib/stratagem/crawler/site_model/page.rb', line 6
def response
@response
end
|
#response_body ⇒ Object
Returns the value of attribute response_body.
13
14
15
|
# File 'lib/stratagem/crawler/site_model/page.rb', line 13
def response_body
@response_body
end
|
Returns the value of attribute url.
8
9
10
|
# File 'lib/stratagem/crawler/site_model/page.rb', line 8
def url
@url
end
|
Instance Method Details
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
|
# File 'lib/stratagem/crawler/site_model/page.rb', line 27
def export
h = {
:external_id => self.object_id,
:url => url,
:path => path,
:request_method => method,
:redirected_to_page_external_id => redirected_to ? redirected_to.object_id : nil,
:route_external_id => route ? route.object_id : nil,
:references_attributes => @invocations.map {|i| i.to_reference.export },
:model_changes => Hash[@model_changes.map {|model,changes| [model.object_id, changes] }].to_json,
:authenticity_checked => @authenticity_checked,
:parameters => @request.parameters.to_json,
:response_body => @response_body,
:response_code => @response.code
}
h
end
|
82
83
84
85
86
87
88
89
90
91
92
93
94
|
# File 'lib/stratagem/crawler/site_model/page.rb', line 82
def forms
@forms ||= begin
if (@document)
forms = self.parse_forms(@document)
forms.each do |form|
form.page = self
end
forms
else
[]
end
end
end
|
#inbound_edges(type = nil) ⇒ Object
108
109
110
|
# File 'lib/stratagem/crawler/site_model/page.rb', line 108
def inbound_edges(type=nil)
@site_model.edges.select {|edge| (edge.to == self) && (type.nil? || (type == edge.type)) }
end
|
#init(request, response, &block) ⇒ Object
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
|
# File 'lib/stratagem/crawler/site_model/page.rb', line 45
def init(request, response, &block)
@request = request.clone
@response = response.clone
@url = request.url
@path = request.path
@method = request.method
body = response.body
begin
if [500, 400, 302].include?(response.code)
body = ''
elsif
body.kind_of?(Array)
body = body.join
body = body.slice(0,20000)
end
@document = Nokogiri::HTML(body)
rescue
puts "ERROR: Could not parse html: #{$!.message} - #{body}"
end
@response_body = body
self.redirected_to = block.call(response.redirect_url) if response.redirect?
end
|
96
97
98
|
# File 'lib/stratagem/crawler/site_model/page.rb', line 96
def login_form
self.find_login_form(@document)
end
|
#outbound_edges(type = nil) ⇒ Object
104
105
106
|
# File 'lib/stratagem/crawler/site_model/page.rb', line 104
def outbound_edges(type=nil)
@site_model.edges.select {|edge| edge.from == self && (type.nil? || (type == edge.type)) }
end
|
#redirected? ⇒ Boolean
78
79
80
|
# File 'lib/stratagem/crawler/site_model/page.rb', line 78
def redirected?
!self.redirected_to.nil?
end
|
#reload(&block) ⇒ Object
71
72
73
74
75
76
|
# File 'lib/stratagem/crawler/site_model/page.rb', line 71
def reload(&block)
request,response = yield url
init(request, response) {|redirected_to| }
end
|
23
24
25
|
# File 'lib/stratagem/crawler/site_model/page.rb', line 23
def route
@route ||= Stratagem::Model::Application.instance.routes.recognize(self)
end
|
112
113
114
115
116
117
118
|
# File 'lib/stratagem/crawler/site_model/page.rb', line 112
def title
if ((@document) && !(@title))
title = (@document/'head title').first
@title = title.inner_html if title
end
@title
end
|
100
101
102
|
# File 'lib/stratagem/crawler/site_model/page.rb', line 100
def to_html
@document.to_html
end
|