Module: Boilerpipe

Defined in:
lib/boilerpipe.rb

Constant Summary collapse

VERSION =
"0.0.4"
BP_DEFAULT_API_URL =
'http://boilerpipe-web.appspot.com/extract'
BP_EXTRACTORS =
[ :ArticleExtractor, :DefaultExtractor, :LargestContentExtractor, :KeepEverythingExtractor, :CanolaExtractor ]
BP_OUTPUT_FORMATS =
[ :html, :htmlFragment, :text, :json, :debug ]

Class Method Summary collapse

Class Method Details

.extract(extract_url, opts = {}) ⇒ Object



20
21
22
23
24
25
26
27
28
29
30
31
# File 'lib/boilerpipe.rb', line 20

def self.extract(extract_url, opts = {})
  @output    = opts[:output].present?     ? opts[:output]     : BP_OUTPUT_FORMATS.first
  @extractor = opts[:extractor].present?  ? opts[:extractor]  : BP_EXTRACTORS.first 
  @api       = opts[:api].present?        ? opts[:api]        : BP_DEFAULT_API_URL
  
  url = [@api, "?url=#{extract_url}", "&extractor=#{@extractor}","&output=#{@output}"].join
  begin
    open(url).read 
  rescue Exception => e 
    Hash[:error => e.message]
  end
end