Class: NPRScraper
- Inherits:
-
Object
- Object
- NPRScraper
- Defined in:
- lib/npr_scraper.rb
Instance Attribute Summary collapse
-
#country_page ⇒ Object
Returns the value of attribute country_page.
-
#home_page ⇒ Object
readonly
Returns the value of attribute home_page.
-
#path_to_country_data ⇒ Object
readonly
Returns the value of attribute path_to_country_data.
-
#path_to_reactor_data ⇒ Object
readonly
Returns the value of attribute path_to_reactor_data.
-
#pris_home ⇒ Object
readonly
Returns the value of attribute pris_home.
-
#reactor_page ⇒ Object
Returns the value of attribute reactor_page.
Instance Method Summary collapse
-
#initialize ⇒ NPRScraper
constructor
default country set to FI, default reactor to LOVIISA-1 – maybe remove the defaults?.
- #scrape_available_countries ⇒ Object
- #scrape_available_reactors ⇒ Object
- #scrape_country_data(country_iso) ⇒ Object
- #scrape_reactor_data(reactor_id) ⇒ Object
Constructor Details
#initialize ⇒ NPRScraper
default country set to FI, default reactor to LOVIISA-1 – maybe remove the defaults?
9 10 11 12 13 14 15 16 |
# File 'lib/npr_scraper.rb', line 9 def initialize #default country set to FI, default reactor to LOVIISA-1 -- maybe remove the defaults? @home_page = "https://www.iaea.org" @pris_home = "#{@home_page}/PRIS/home.aspx" #page to draw available countries and reactors from together with their codes @path_to_country_data = "/PRIS/CountryStatistics/CountryDetails.aspx?current=" @path_to_reactor_data = "/PRIS/CountryStatistics/ReactorDetails.aspx?current=" # @country_page = "#{@home_page}#{@path_to_country_data}FI" # @reactor_page = "#{@home_page}#{@path_to_reactor_data}157" end |
Instance Attribute Details
#country_page ⇒ Object
Returns the value of attribute country_page.
6 7 8 |
# File 'lib/npr_scraper.rb', line 6 def country_page @country_page end |
#home_page ⇒ Object (readonly)
Returns the value of attribute home_page.
7 8 9 |
# File 'lib/npr_scraper.rb', line 7 def home_page @home_page end |
#path_to_country_data ⇒ Object (readonly)
Returns the value of attribute path_to_country_data.
7 8 9 |
# File 'lib/npr_scraper.rb', line 7 def path_to_country_data @path_to_country_data end |
#path_to_reactor_data ⇒ Object (readonly)
Returns the value of attribute path_to_reactor_data.
7 8 9 |
# File 'lib/npr_scraper.rb', line 7 def path_to_reactor_data @path_to_reactor_data end |
#pris_home ⇒ Object (readonly)
Returns the value of attribute pris_home.
7 8 9 |
# File 'lib/npr_scraper.rb', line 7 def pris_home @pris_home end |
#reactor_page ⇒ Object
Returns the value of attribute reactor_page.
6 7 8 |
# File 'lib/npr_scraper.rb', line 6 def reactor_page @reactor_page end |
Instance Method Details
#scrape_available_countries ⇒ Object
18 19 20 21 22 23 24 25 26 |
# File 'lib/npr_scraper.rb', line 18 def scrape_available_countries #scrapes the PRIS home page and returns a hash of country data that has the name & iso code for all available countries raw_text = Nokogiri::HTML(open(@pris_home)) selection_list = raw_text.css(".box-content.shortCutBox").css("#MainContent_ddlCountry").css("option") selection_list.each_with_object({}) do |country, scraped_country_ids| scraped_country_ids[country.values[0]] = country.text unless country.text == "" #Builds a hash: scraped_country_ids = {country1_name => iso1, country2_name => iso2, ...} end end |
#scrape_available_reactors ⇒ Object
28 29 30 31 32 33 34 35 36 |
# File 'lib/npr_scraper.rb', line 28 def scrape_available_reactors #scrapes the PRIS home page and returns a hash of reactor data that has the name and id for all available reactors raw_text = Nokogiri::HTML(open(@pris_home)) selection_list = raw_text.css(".box-content.shortCutBox").css("#MainContent_ddlReactors").css("option") selection_list.each_with_object({}) do |reactor, scraped_reactor_ids| scraped_reactor_ids[reactor.values[0]] = reactor.text unless reactor.text == "" #Builds a hash: scraped_reactor_ids = {reactor1_name => id1, reactor2_name => id2, ...} end end |
#scrape_country_data(country_iso) ⇒ Object
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
# File 'lib/npr_scraper.rb', line 38 def scrape_country_data(country_iso) #scrapes the PRIS country_page and returns a hash of country data that lists the energy production and the names of reactors in that country @country_page = "#{@home_page}#{@path_to_country_data}#{country_iso}" raw_text = Nokogiri::HTML(open(@country_page)) summary_data_keys = raw_text.css(".box-content").css("td").css("label") summary_data = raw_text.css(".box-content").css("td").css("h2") reactor_table = raw_text.css(".tablesorter").css("td").css("a") country = {} country[:iso] = country_iso summary_data_keys.each_with_index do |key, i| key_string = key.text.strip!.downcase!.match(/\b"?(\w+)\-?\s?(\w+)?\b/).captures if key_string[1].nil? country[key_string[0].to_sym] = summary_data[i].text.strip! else country[key_string.join("_").to_sym] = summary_data[i].text.strip! end end #calculate the share of energy produced with nuclear power vs total energy produced nep = country[:nuclear_electricity].gsub(/\sGW\.h/, "").to_f tep = country[:total_electricity].gsub(/\sGW\.h/, "").to_f country[:nuclear_e_share] = "#{((nep/tep)*100).round(2)}%" #find reactors country[:reactors] = reactor_table.collect {|reactor| reactor.text} country end |
#scrape_reactor_data(reactor_id) ⇒ Object
68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
# File 'lib/npr_scraper.rb', line 68 def scrape_reactor_data(reactor_id) #scrapes the PRIS reactor_page and returns a hash of reactor data @reactor_page = "#{@home_page}#{@path_to_reactor_data}#{reactor_id}" raw_text = Nokogiri::HTML(open(@reactor_page)) country_name = raw_text.css(".sidebar").css("#MainContent_litCaption").text.strip! reactor_data = raw_text.css(".box-content").css("span") reactor = {} reactor[:location] = country_name reactor[:status] = raw_text.css("#MainContent_MainContent_lblReactorStatus").text #add rest of the data with keys reactor_data.each do |data| reactor[data.values[0].match(/MainContent_MainContent_lbl(\w*)/).captures[0].to_sym] = data.text.strip end reactor end |