Module: WebParser
- Defined in:
- lib/web_parser/web_parser.rb
Constant Summary collapse
- DEFAULT_OPTIONS =
{ 'max_workers'=>5, 'queue'=>{ 'server'=>'127.0.0.1:22122', 'in_queue' => 'in_queue', 'out_queue' => 'out_queue', 'wait_time' => 1 } }
Class Method Summary collapse
- .extract(xhtml, template) ⇒ Object
- .extract_from_file(web_file, template, options = {}) ⇒ Object
- .extract_from_url(url, template, options = {}) ⇒ Object
Class Method Details
.extract(xhtml, template) ⇒ Object
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
# File 'lib/web_parser/web_parser.rb', line 39 def extract(xhtml,template) template = Template.load_template(template) unless template.instance_of?(Hash) # 如果给定的template不是哈希,则认为是模板文件进行加载 dom = xhtml_to_dom(xhtml) objects = template.inject({}) do |result,object| object_name,object_attributes = object[0],object[1] object_result = case object_attributes['type'] when 'single' extract_single(dom,object_attributes) when 'list' extract_list(dom,object_attributes) when 'special' extract_special(dom,object_attributes) else nil end result.merge!(object_name=>object_result) end end |
.extract_from_file(web_file, template, options = {}) ⇒ Object
63 64 65 66 |
# File 'lib/web_parser/web_parser.rb', line 63 def extract_from_file(web_file,template,={}) doc = get_doc_from_file(web_file,) extract(doc,template) end |
.extract_from_url(url, template, options = {}) ⇒ Object
58 59 60 61 |
# File 'lib/web_parser/web_parser.rb', line 58 def extract_from_url(url,template,={}) doc = get_doc_from_url(url,) extract(doc,template) end |