Module: Boilerpipe
- Defined in:
- lib/boilerpipe.rb
Constant Summary collapse
- VERSION =
"0.0.4"
- BP_DEFAULT_API_URL =
'http://boilerpipe-web.appspot.com/extract'
- BP_EXTRACTORS =
[ :ArticleExtractor, :DefaultExtractor, :LargestContentExtractor, :KeepEverythingExtractor, :CanolaExtractor ]
- BP_OUTPUT_FORMATS =
[ :html, :htmlFragment, :text, :json, :debug ]
Class Method Summary collapse
Class Method Details
.extract(extract_url, opts = {}) ⇒ Object
20 21 22 23 24 25 26 27 28 29 30 31 |
# File 'lib/boilerpipe.rb', line 20 def self.extract(extract_url, opts = {}) @output = opts[:output].present? ? opts[:output] : BP_OUTPUT_FORMATS.first @extractor = opts[:extractor].present? ? opts[:extractor] : BP_EXTRACTORS.first @api = opts[:api].present? ? opts[:api] : BP_DEFAULT_API_URL url = [@api, "?url=#{extract_url}", "&extractor=#{@extractor}","&output=#{@output}"].join begin open(url).read rescue Exception => e Hash[:error => e.] end end |