Class: Yahoo::TermExtractor

Inherits:
Object
  • Object
show all
Defined in:
lib/yahoo_term_extractor.rb

Overview

Yahoo Term Extractor ruby class. Be warned that Yahoo rate limit this api to 5000 queries a day! You need an appid from yahoo to use this.

Constant Summary collapse

API_URL =
URI.parse('http://api.search.yahoo.com/ContentAnalysisService/V1/termExtraction')

Instance Method Summary collapse

Constructor Details

#initialize(appid) ⇒ TermExtractor

term_extractor = YahooTermExtractor.new(“appid”)

Raises:

  • (ArgumentError)


16
17
18
19
# File 'lib/yahoo_term_extractor.rb', line 16

def initialize(appid)
  raise ArgumentError, 'appid must be supplied' if appid.nil? or appid.empty?
  @appid = appid
end

Instance Method Details

#extract_terms(context, args = {}) ⇒ Object

term_extractor.extract_terms(“ Several eco-town proposals were submitted for locations within this area, principally between Leeds and Selby. The Leeds City Region Partnership has indicated support in principle for an eco-town within the sub-region.”, :query => “leeds”)

Raises:

  • (ArgumentError)


22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/yahoo_term_extractor.rb', line 22

def extract_terms(context, args = {})
  raise ArgumentError, 'context must be supplied' if context.empty?
  params = { 'appid' => @appid, 'context' => context}
  params['query'] = args[:query] if args.has_key?(:query) and !args[:query].empty?

  # DEB: put a begin -> rescue block round this!? or just let them bubble to the top...
  response = Net::HTTP.post_form(API_URL, params)
  unless response.code == '200'
    raise Yahoo::APIError, "#{response.code} received from Yahoo API" 
  end

  xml = REXML::Document.new response.body
  if !xml.root.nil? and xml.root.name == "ResultSet"
    terms = []   
    xml.each_element("/ResultSet/Result") do |term|
      terms << term.text
    end
  elsif !xml.root.nil? and xml.root.name == "Error"
    messages = []
    xml.each_element("/Error/Message") do |message|
      messages << message.text
    end
    raise Yahoo::APIError, "#{xml.root.text} #{messages.join(', ')}"
  else
    raise Yahoo::APIError, "Unknown document returned: #{response.body}"
  end

  return terms
end