Module: Jkl
- Defined in:
- lib/jkl.rb,
lib/jkl/rss_client.rb,
lib/jkl/rest_client.rb,
lib/jkl/calais_client.rb,
lib/jkl/url_doc_handler.rb,
lib/jkl/persistence_client.rb
Defined Under Namespace
Classes: Trend
Constant Summary collapse
- LICENSE_ID =
YAML::load_file('config/keys.yml')['calais']
- C_URI =
URI.parse('http://api.opencalais.com/enlighten/rest/')
- SERVER =
CouchRest.database! YAML::load_file('config/config.yml')['db']
Instance Method Summary collapse
- #attribute_from(item, name) ⇒ Object
-
#clean_unwanted_items_from_hash(h) ⇒ Object
jkl doesn’t work with these aspects of the calais response, also removing blanks.
- #delete_db ⇒ Object
- #from_doc(response) ⇒ Object
- #get_calais_metadata(response) ⇒ Object
- #get_from(uri) ⇒ Object
- #get_from_as_xml(uri) ⇒ Object
- #get_from_calais(content) ⇒ Object
- #get_items_from(rssdoc) ⇒ Object
- #get_tag_from_json(response) ⇒ Object
- #headlines(keyphrase) ⇒ Object
- #pages(headlines) ⇒ Object
- #post_to(uri, post_args) ⇒ Object
- #sanitize(text) ⇒ Object
- #tags(pages) ⇒ Object
Instance Method Details
#attribute_from(item, name) ⇒ Object
11 12 13 |
# File 'lib/jkl/rss_client.rb', line 11 def attribute_from(item, name) (item/name).inner_html end |
#clean_unwanted_items_from_hash(h) ⇒ Object
jkl doesn’t work with these aspects of the calais response, also removing blanks
36 37 38 39 40 41 42 43 44 |
# File 'lib/jkl/calais_client.rb', line 36 def clean_unwanted_items_from_hash h h.delete_if {|k, v| k == "relevance" } h.delete_if {|k, v| k == "instances" } h.delete_if {|k, v| v == "N/A"} h.delete_if {|k, v| v == []} h.delete_if {|k, v| v == ""} h.delete_if {|k, v| k == "_typeGroup"} h end |
#delete_db ⇒ Object
7 8 9 |
# File 'lib/jkl/persistence_client.rb', line 7 def delete_db SERVER.delete! rescue nil end |
#from_doc(response) ⇒ Object
15 16 17 18 19 20 21 22 23 24 25 |
# File 'lib/jkl/url_doc_handler.rb', line 15 def from_doc(response) begin Hpricot(response) rescue URI::InvalidURIError => e puts("WARN: Problem with getting a connection: #{e}") rescue SocketError => e puts("WARN: Could not connect to feed: #{e}") rescue Errno::ECONNREFUSED => e puts("WARN: Connection refused: #{e}") end end |
#get_calais_metadata(response) ⇒ Object
29 30 31 32 |
# File 'lib/jkl/calais_client.rb', line 29 def (response) ce = CalaisExtractor.new( response ) ce.prettify end |
#get_from(uri) ⇒ Object
19 20 21 22 23 24 25 26 27 28 29 30 |
# File 'lib/jkl/rest_client.rb', line 19 def get_from(uri) begin res = Net::HTTP.get_response(URI.parse(uri)) res.body rescue URI::InvalidURIError => e puts("WARN: Invalid URI: #{e}") rescue SocketError => e puts("WARN: Could not connect: #{e}") rescue Errno::ECONNREFUSED => e puts("WARN: Connection refused: #{e}") end end |
#get_from_as_xml(uri) ⇒ Object
32 33 34 |
# File 'lib/jkl/rest_client.rb', line 32 def get_from_as_xml(uri) Hpricot.XML get_from uri end |
#get_from_calais(content) ⇒ Object
10 11 12 13 14 |
# File 'lib/jkl/calais_client.rb', line 10 def get_from_calais(content) post_args = { 'licenseID' => LICENSE_ID, 'content' => content, 'paramsXML' => paramsXML('application/json') } post_to(C_URI, post_args) end |
#get_items_from(rssdoc) ⇒ Object
5 6 7 8 9 |
# File 'lib/jkl/rss_client.rb', line 5 def get_items_from(rssdoc) items = [] (rssdoc/:item).each { |rssitem| items.push rssitem } unless rssdoc==nil items end |
#get_tag_from_json(response) ⇒ Object
16 17 18 19 20 21 22 23 24 25 26 27 |
# File 'lib/jkl/calais_client.rb', line 16 def get_tag_from_json(response) result = JSON.parse response result.delete_if {|key, value| key == "doc" } # ditching the doc cleaned_result = [] result.each do |key,tag| tag = clean_unwanted_items_from_hash tag cleaned_result << tag yield tag if block_given? end cleaned_result end |
#headlines(keyphrase) ⇒ Object
9 10 11 |
# File 'lib/jkl.rb', line 9 def headlines(keyphrase) get_from_as_xml "#{YAML::load_file('config/config.yml')['topix']}#{CGI::escape(keyphrase)}" end |
#pages(headlines) ⇒ Object
13 14 15 16 17 18 19 20 |
# File 'lib/jkl.rb', line 13 def pages(headlines) items = get_items_from headlines descriptions = "" items.each do |item| descriptions << attribute_from(item, :description).gsub("<![CDATA[",'').gsub("]]>",'') end descriptions end |
#post_to(uri, post_args) ⇒ Object
6 7 8 9 10 11 12 13 14 15 16 17 |
# File 'lib/jkl/rest_client.rb', line 6 def post_to(uri, post_args) begin resp, data = Net::HTTP.post_form(uri, post_args) data rescue URI::InvalidURIError => e puts("WARN: Invalid URI: #{e}") rescue SocketError => e puts("WARN: Could not connect: #{e}") rescue Errno::ECONNREFUSED => e puts("WARN: Connection refused: #{e}") end end |
#sanitize(text) ⇒ Object
6 7 8 9 10 11 12 13 |
# File 'lib/jkl/url_doc_handler.rb', line 6 def sanitize(text) str = "" text.to_s.gsub(/<\/?[^>]*>/, "").split("\r").each do |l| # remove tags l = l.chomp.gsub("\t",'').gsub(/\s{2,}/,'') # remove tabs and larger spaces str << l unless l.count(" ") < 5 # remove short lines - ususally just navigation end str end |
#tags(pages) ⇒ Object
22 23 24 25 |
# File 'lib/jkl.rb', line 22 def (pages) cal_response = get_from_calais(pages) (cal_response) end |