Class: IMW::Parsers::HtmlParser
- Inherits:
-
Object
- Object
- IMW::Parsers::HtmlParser
- Includes:
- HtmlMatchers
- Defined in:
- lib/titi/ignore/html_parser.rb
Instance Attribute Summary collapse
-
#parse_tree ⇒ Object
Returns the value of attribute parse_tree.
Class Method Summary collapse
-
.attr(selector, attr, matcher = nil) ⇒ Object
match the
attr
attribute of the first element given byselector
. -
.href(selector, matcher = nil) ⇒ Object
shorthand for attr(foo, ‘href’).
-
.one(selector, matcher) ⇒ Object
one(“hpricot_path”) first match to hpricot_path one(“hpricot_path”, /spec/) applies spec to first match to hpricot_path.
-
.parser_spec ⇒ Object
See IMW::HtmlParser for syntax.
- .proc(selector, proc, matcher = nil) ⇒ Object
- .re(selector, re) ⇒ Object
- .re_all(selector, re, matcher = nil) ⇒ Object
- .re_group(selector, re) ⇒ Object
-
.src(selector, matcher = nil) ⇒ Object
shorthand for attr(foo, ‘src’).
- .strip(selector, matcher = nil) ⇒ Object
- .to_json(selector, matcher = nil) ⇒ Object
-
.to_num(selector, matcher = nil) ⇒ Object
strip “,”s (!! thus disrespecting locale !!!) and convert to int.
Instance Method Summary collapse
-
#initialize(arg_spec = nil) ⇒ HtmlParser
constructor
Parse Tree.
-
#parse(doc) ⇒ Object
Walk.
Constructor Details
#initialize(arg_spec = nil) ⇒ HtmlParser
Parse Tree
205 206 207 208 |
# File 'lib/titi/ignore/html_parser.rb', line 205 def initialize arg_spec=nil spec = arg_spec || self.class.parser_spec self.parse_tree = IMW::Parsers::HtmlMatchers.build_parse_tree(spec) end |
Instance Attribute Details
#parse_tree ⇒ Object
Returns the value of attribute parse_tree.
200 201 202 |
# File 'lib/titi/ignore/html_parser.rb', line 200 def parse_tree @parse_tree end |
Class Method Details
.attr(selector, attr, matcher = nil) ⇒ Object
match the attr
attribute of the first element given by selector
232 233 234 |
# File 'lib/titi/ignore/html_parser.rb', line 232 def self.attr selector, attr, matcher=nil MatchAttribute.new(selector, attr, IMW::Parsers::HtmlMatchers.build_parse_tree(matcher)) end |
.href(selector, matcher = nil) ⇒ Object
shorthand for attr(foo, ‘href’)
236 237 238 |
# File 'lib/titi/ignore/html_parser.rb', line 236 def self.href selector, matcher=nil self.attr(selector, 'href', matcher) end |
.one(selector, matcher) ⇒ Object
one(“hpricot_path”) first match to hpricot_path one(“hpricot_path”, /spec/) applies spec to first match to hpricot_path
228 229 230 |
# File 'lib/titi/ignore/html_parser.rb', line 228 def self.one selector, matcher MatchFirstElement.new(selector, IMW::Parsers::HtmlMatchers.build_parse_tree(matcher)) end |
.parser_spec ⇒ Object
See IMW::HtmlParser for syntax
214 215 216 |
# File 'lib/titi/ignore/html_parser.rb', line 214 def self.parser_spec raise "Override this to create your own parser spec" end |
.proc(selector, proc, matcher = nil) ⇒ Object
244 245 246 |
# File 'lib/titi/ignore/html_parser.rb', line 244 def self.proc selector, proc, matcher=nil MatchProc.new(selector, proc, IMW::Parsers::HtmlMatchers.build_parse_tree(matcher)) end |
.re(selector, re) ⇒ Object
264 265 266 |
# File 'lib/titi/ignore/html_parser.rb', line 264 def self.re selector, re MatchRegexp.new(selector, re, nil, :capture => 1) end |
.re_all(selector, re, matcher = nil) ⇒ Object
267 268 269 |
# File 'lib/titi/ignore/html_parser.rb', line 267 def self.re_all selector, re, matcher=nil MatchRegexpRepeatedly.new(selector, re) end |
.re_group(selector, re) ⇒ Object
261 262 263 |
# File 'lib/titi/ignore/html_parser.rb', line 261 def self.re_group selector, re MatchRegexp.new(selector, re) end |
.src(selector, matcher = nil) ⇒ Object
shorthand for attr(foo, ‘src’)
240 241 242 |
# File 'lib/titi/ignore/html_parser.rb', line 240 def self.src selector, matcher=nil self.attr(selector, 'src', matcher) end |
.strip(selector, matcher = nil) ⇒ Object
257 258 259 |
# File 'lib/titi/ignore/html_parser.rb', line 257 def self.strip selector, matcher=nil proc selector, lambda{|v| v.strip }, matcher end |
.to_json(selector, matcher = nil) ⇒ Object
253 254 255 |
# File 'lib/titi/ignore/html_parser.rb', line 253 def self.to_json selector, matcher=nil proc selector, lambda{|v| v.to_json if v }, matcher end |
.to_num(selector, matcher = nil) ⇒ Object
strip “,”s (!! thus disrespecting locale !!!) and convert to int
250 251 252 |
# File 'lib/titi/ignore/html_parser.rb', line 250 def self.to_num selector, matcher=nil proc selector, lambda{|num| num.to_s.gsub(/,/,'').to_i if num }, matcher end |
Instance Method Details
#parse(doc) ⇒ Object
Walk
221 222 223 |
# File 'lib/titi/ignore/html_parser.rb', line 221 def parse doc self.parse_tree.match(doc) end |