Class: Securities::Scraper
- Inherits:
-
Object
- Object
- Securities::Scraper
- Defined in:
- lib/securities/scraper.rb
Overview
Main class to communicate with Yahoo! Finance
Defined Under Namespace
Classes: ScraperException
Class Method Summary collapse
Class Method Details
.get(type, url) ⇒ Object
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
# File 'lib/securities/scraper.rb', line 16 def self.get type, url results = Array.new # Encoding for bad characters in index names. clean_url = URI::encode(url) uri = URI.parse(clean_url) # Check connection. begin get = Net::HTTP.get(uri) rescue => error raise ScraperException, "Connection error: #{error.}" end case type # # Scraping for lookup. when :lookup doc = Nokogiri::HTML(get) table = doc.at('div#yfi_sym_results tbody') if table.nil? raise ScraperException, 'There were no results for this lookup.' end # Symbol Name Last Trade Type Industry/Category Exchange table.xpath('tr').each do |tr| row = tr.xpath('td') results << {:symbol => row[0].text, :name => row[1].text, :last_trade => row[2].text, :type => row[3].text, :industry_category => row[4].text, :exchange => row[5].text} end # # Scraping for history when :history # Skip first line because it contains headers with Date,Open,High,Low,Close,Volume,Adj Close. # Check for errors during CSV parsing. begin csv = CSV.parse(get, :headers => true) rescue => error # Probably an invalid symbol specified or there was some other way the parser couldn't read a CSV. raise ScraperException, 'Stock symbol does not exist.' end csv.each_with_index do |row, index| line = Hash.new csv.headers.each_with_index do |header, i| # Set headers as keys for the data hash. line[header.parameterize.underscore.to_sym] = row[i] results[index] = line end end if results.empty? raise ScraperException, 'There were no results for this symbol.' end # Reversing results to return from the oldest to the newest. results = results.reverse end return results end |