Class: Husc
- Inherits:
-
Object
show all
- Defined in:
- lib/husc.rb,
lib/husc/version.rb
Defined Under Namespace
Classes: CrawlArray, Error
Constant Summary
collapse
- VERSION =
"0.4.1"
Instance Attribute Summary collapse
Instance Method Summary
collapse
Constructor Details
#initialize(url = nil, doc: nil, html: nil, user_agent: nil, request_headers: nil, timeout: 10) ⇒ Husc
Returns a new instance of Husc.
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
|
# File 'lib/husc.rb', line 39
def initialize(url = nil, doc: nil, html: nil, user_agent: nil, request_headers: nil, timeout: 10)
@agent = Mechanize.new
@agent.keep_alive = false
@agent.user_agent = user_agent unless user_agent.nil?
@agent. = unless .nil?
@agent.read_timeout = timeout
if !url.nil?
get(url)
elsif !doc.nil?
@html = doc.to_html
@doc = doc
table_to_hash
else
update_params(html)
@html = html
end
@params = []
end
|
Instance Attribute Details
#code ⇒ Object
Returns the value of attribute code.
12
13
14
|
# File 'lib/husc.rb', line 12
def code
@code
end
|
#html ⇒ Object
Returns the value of attribute html.
12
13
14
|
# File 'lib/husc.rb', line 12
def html
@html
end
|
#params ⇒ Object
Returns the value of attribute params.
12
13
14
|
# File 'lib/husc.rb', line 12
def params
@params
end
|
#tables ⇒ Object
Returns the value of attribute tables.
12
13
14
|
# File 'lib/husc.rb', line 12
def tables
@tables
end
|
#url ⇒ Object
Returns the value of attribute url.
12
13
14
|
# File 'lib/husc.rb', line 12
def url
@url
end
|
Instance Method Details
#attr(name) ⇒ Object
198
199
200
201
202
203
204
205
206
|
# File 'lib/husc.rb', line 198
def attr(name)
ret = @doc.attr(name)
if ret.nil?
return ''
else
return ret
end
end
|
#css(locator, single = false) ⇒ Object
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
|
# File 'lib/husc.rb', line 155
def css(locator, single = false)
elements = CrawlArray.new(@doc.css(locator).map {|el| Husc.new(doc: el)})
if single
if elements[0] == nil
return CrawlArray.new()
else
return elements[0]
end
else
return elements
end
end
|
#get(url) ⇒ Object
61
62
63
64
65
66
67
68
69
70
71
72
73
74
|
# File 'lib/husc.rb', line 61
def get(url)
@url = url
begin
page = @agent.get(@url)
@code = page.code
rescue Mechanize::ResponseCodeError => e
@code = e.page.body
rescue Net::HTTP::Persistent::Error => e
puts e
end
html = page.content.toutf8
update_params(html)
end
|
#inner_html(shaping = true) ⇒ Object
180
181
182
183
184
185
186
187
|
# File 'lib/husc.rb', line 180
def inner_html(shaping = true)
if shaping
return shaping_string(@doc.inner_html)
else
@doc.inner_html
end
end
|
#inner_text(shaping = true) ⇒ Object
171
172
173
174
175
176
177
178
|
# File 'lib/husc.rb', line 171
def inner_text(shaping = true)
if shaping
return shaping_string(@doc.inner_text)
else
@doc.inner_text
end
end
|
#send(opts) ⇒ Object
76
77
78
79
80
81
82
83
84
85
86
|
# File 'lib/husc.rb', line 76
def send(opts)
@params << {}
opts = opts.map { |k, v| [k.to_sym, v] }.to_h
opts.each { |k, v| @params[-1][k.to_sym] = v }
end
|
#submit(opts) ⇒ Object
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
|
# File 'lib/husc.rb', line 88
def submit(opts)
opts = opts.map { |k,v| [k.to_sym, v] }.to_h
if opts.kind_of?(Integer)
form = @agent.page.forms[opts]
else
form = @agent.page.form(**opts)
end
return if form.nil?
button = nil
@params.each do |param|
if param.include?(:value) && !param.include?(:check)
value = param.delete(:value)
next if value.nil?
form.field_with(**param).value = value unless form.field_with(**param).nil?
end
if param.include?(:check)
check = param.delete(:check)
next if check.nil?
if check
form.checkbox_with(**param).check unless form.checkbox_with(**param).nil?
else
form.checkbox_with(**param).uncheck unless form.checkbox_with(**param).nil?
end
end
if param.include?(:file)
file = param.delete(:file)
next if file.nil? || !File.exist?(file)
form.file_upload_with(**param).file_name = file unless form.file_upload_with(**param).nil?
end
if param.include?(:click)
click = param.delete(:click)
next unless click
button = form.button_with(**param) unless form.button_with(**param).nil?
end
end
form = @agent.submit(form, button)
update_params(form.content.toutf8)
@params = []
end
|
#text(shaping = true) ⇒ Object
189
190
191
192
193
194
195
196
|
# File 'lib/husc.rb', line 189
def text(shaping = true)
if shaping
return shaping_string(@doc.text)
else
@doc.text
end
end
|
#xpath(locator, single = false) ⇒ Object
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
|
# File 'lib/husc.rb', line 139
def xpath(locator, single = false)
elements = CrawlArray.new(@doc.xpath(locator).map {|el| Husc.new(doc: el)})
if single
if elements[0] == nil
return CrawlArray.new()
else
return elements[0]
end
else
return elements
end
end
|