Module: Jkl

Defined in:
lib/jkl.rb,
lib/jkl/rss_client.rb,
lib/jkl/rest_client.rb,
lib/jkl/calais_client.rb,
lib/jkl/url_doc_handler.rb,
lib/jkl/persistence_client.rb

Defined Under Namespace

Classes: Trend

Constant Summary collapse

LICENSE_ID =
YAML::load_file('config/keys.yml')['calais']
C_URI =
URI.parse('http://api.opencalais.com/enlighten/rest/')
SERVER =
CouchRest.database! YAML::load_file('config/config.yml')['db']

Instance Method Summary collapse

Instance Method Details

#attribute_from(item, name) ⇒ Object



11
12
13
# File 'lib/jkl/rss_client.rb', line 11

def attribute_from(item, name)
  (item/name).inner_html
end

#clean_unwanted_items_from_hash(h) ⇒ Object

jkl doesn’t work with these aspects of the calais response, also removing blanks



36
37
38
39
40
41
42
43
44
# File 'lib/jkl/calais_client.rb', line 36

def clean_unwanted_items_from_hash h
  h.delete_if {|k, v| k == "relevance" }
  h.delete_if {|k, v| k == "instances" }
  h.delete_if {|k, v| v == "N/A"}
  h.delete_if {|k, v| v == []}
  h.delete_if {|k, v| v == ""}
  h.delete_if {|k, v| k == "_typeGroup"}
  h
end

#delete_dbObject



7
8
9
# File 'lib/jkl/persistence_client.rb', line 7

def delete_db
  SERVER.delete! rescue nil
end

#from_doc(response) ⇒ Object



15
16
17
18
19
20
21
22
23
24
25
# File 'lib/jkl/url_doc_handler.rb', line 15

def from_doc(response)
  begin
    Hpricot(response)
  rescue  URI::InvalidURIError => e
    puts("WARN: Problem with getting a connection: #{e}")
  rescue SocketError => e
    puts("WARN: Could not connect to feed: #{e}")
  rescue Errno::ECONNREFUSED  => e
    puts("WARN: Connection refused: #{e}")
  end
end

#get_calais_metadata(response) ⇒ Object



29
30
31
32
# File 'lib/jkl/calais_client.rb', line 29

def (response)
    ce = CalaisExtractor.new( response )
    ce.prettify
end

#get_from(uri) ⇒ Object



19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/jkl/rest_client.rb', line 19

def get_from(uri)
  begin
    res = Net::HTTP.get_response(URI.parse(uri))
    res.body
  rescue  URI::InvalidURIError => e
    puts("WARN: Invalid URI: #{e}")
  rescue SocketError => e
    puts("WARN: Could not connect: #{e}")
  rescue Errno::ECONNREFUSED  => e
    puts("WARN: Connection refused: #{e}")
  end
end

#get_from_as_xml(uri) ⇒ Object



32
33
34
# File 'lib/jkl/rest_client.rb', line 32

def get_from_as_xml(uri)
  Hpricot.XML get_from uri
end

#get_from_calais(content) ⇒ Object



10
11
12
13
14
# File 'lib/jkl/calais_client.rb', line 10

def get_from_calais(content)  
  post_args = { 'licenseID' => LICENSE_ID, 'content' => content, 
                'paramsXML' => paramsXML('application/json') }
  post_to(C_URI, post_args)
end

#get_items_from(rssdoc) ⇒ Object



5
6
7
8
9
# File 'lib/jkl/rss_client.rb', line 5

def get_items_from(rssdoc)
  items = []
  (rssdoc/:item).each { |rssitem| items.push rssitem } unless rssdoc==nil
  items
end

#get_tag_from_json(response) ⇒ Object



16
17
18
19
20
21
22
23
24
25
26
27
# File 'lib/jkl/calais_client.rb', line 16

def get_tag_from_json(response)
  result = JSON.parse response
  result.delete_if {|key, value| key == "doc" } # ditching the doc
  cleaned_result = []
  result.each do |key,tag| 
    tag = clean_unwanted_items_from_hash tag
    cleaned_result << tag
    yield tag if block_given?
  end

  cleaned_result
end

#headlines(keyphrase) ⇒ Object



9
10
11
# File 'lib/jkl.rb', line 9

def headlines(keyphrase)
  get_from_as_xml "#{YAML::load_file('config/config.yml')['topix']}#{CGI::escape(keyphrase)}"
end

#pages(headlines) ⇒ Object



13
14
15
16
17
18
19
20
# File 'lib/jkl.rb', line 13

def pages(headlines)
  items = get_items_from headlines
  descriptions = ""
  items.each do |item| 
    descriptions << attribute_from(item, :description).gsub("<![CDATA[",'').gsub("]]>",'')
  end
  descriptions
end

#post_to(uri, post_args) ⇒ Object



6
7
8
9
10
11
12
13
14
15
16
17
# File 'lib/jkl/rest_client.rb', line 6

def post_to(uri, post_args)
  begin
    resp, data = Net::HTTP.post_form(uri, post_args)
    data
  rescue  URI::InvalidURIError => e
    puts("WARN: Invalid URI: #{e}")
  rescue SocketError => e
    puts("WARN: Could not connect: #{e}")
  rescue Errno::ECONNREFUSED  => e
    puts("WARN: Connection refused: #{e}")
  end
end

#sanitize(text) ⇒ Object



6
7
8
9
10
11
12
13
# File 'lib/jkl/url_doc_handler.rb', line 6

def sanitize(text)
  str = ""
  text.to_s.gsub(/<\/?[^>]*>/, "").split("\r").each do |l| # remove tags
    l = l.chomp.gsub("\t",'').gsub(/\s{2,}/,'') # remove tabs and larger spaces
    str << l unless l.count(" ") < 5 # remove short lines - ususally just navigation
  end
  str
end

#tags(pages) ⇒ Object



22
23
24
25
# File 'lib/jkl.rb', line 22

def tags(pages)
  cal_response = get_from_calais(pages)
  (cal_response)
end