Module: Raspar
- Defined in:
- lib/raspar.rb,
lib/raspar/parser.rb,
lib/raspar/result.rb,
lib/raspar/version.rb,
lib/raspar/dynamic_parser.rb more...
Defined Under Namespace
Modules: Parser
Classes: DynamicParser, RasparException, Result
Constant Summary
collapse
- VERSION =
"0.0.2"
Class Method Summary
collapse
Class Method Details
[View source]
24
25
26
|
# File 'lib/raspar.rb', line 24
def _init
@parsers = {}
end
|
permalink
.add(url, selector_map = nil, helper_module = nil, &block) ⇒ Object
[View source]
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
|
# File 'lib/raspar.rb', line 65
def add(url, selector_map = nil, helper_module = nil, &block)
if self.exist?(url)
raise RasparException.new("Parser already exist for '#{url}'")
end
if selector_map
return DynamicParser.register(url, selector_map, helper_module)
end
klass_name = URI(url).host
.split('.')
.reject{|w| w == 'www'}
.collect{|w| w[0].upcase + w[1..-1] }
.join
.gsub(/\W/, '')
klass = Class.new
klass.send :include, Raspar
klass.domain(url)
klass.class_exec(&block) if block_given?
Raspar.const_set(klass_name, klass)
end
|
permalink
.clear_parser_list ⇒ Object
[View source]
39
40
41
|
# File 'lib/raspar.rb', line 39
def clear_parser_list
@parsers = {}
end
|
permalink
.exist?(url) ⇒ Boolean
[View source]
51
52
53
|
# File 'lib/raspar.rb', line 51
def exist?(url)
@parsers.include?(URI(url).host)
end
|
permalink
.parse(url, html) ⇒ Object
[View source]
55
56
57
58
59
60
61
62
63
|
# File 'lib/raspar.rb', line 55
def parse(url, html)
host = URI(url).host
if @parsers[host]
@parsers[host].parse(html).group_by(&:name)
else
puts "No parser define for #{host}"
nil
end
end
|
[View source]
47
48
49
|
# File 'lib/raspar.rb', line 47
def parsers
@parsers
end
|
permalink
.register(domain, klass) ⇒ Object
Register parser class and domain
Example
Raspar::Base.register(‘test.com’, TestParser)
[View source]
33
34
35
36
|
# File 'lib/raspar.rb', line 33
def register(domain, klass)
@parsers ||= {}
(URI(domain).host || domain).tap{ |host| @parsers[host] = klass }
end
|
permalink
.remove(domain) ⇒ Object
[View source]
43
44
45
|
# File 'lib/raspar.rb', line 43
def remove(domain)
@parsers.delete(URI(domain).host) if @parsers
end
|