Class: NPRScraper

Inherits:
Object
  • Object
show all
Defined in:
lib/npr_scraper.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeNPRScraper

default country set to FI, default reactor to LOVIISA-1 – maybe remove the defaults?



9
10
11
12
13
14
15
16
# File 'lib/npr_scraper.rb', line 9

def initialize #default country set to FI, default reactor to LOVIISA-1 -- maybe remove the defaults?
  @home_page = "https://www.iaea.org"
  @pris_home = "#{@home_page}/PRIS/home.aspx"  #page to draw available countries and reactors from together with their codes
  @path_to_country_data = "/PRIS/CountryStatistics/CountryDetails.aspx?current="
  @path_to_reactor_data = "/PRIS/CountryStatistics/ReactorDetails.aspx?current="
  # @country_page = "#{@home_page}#{@path_to_country_data}FI"
  # @reactor_page = "#{@home_page}#{@path_to_reactor_data}157"
end

Instance Attribute Details

#country_pageObject

Returns the value of attribute country_page.



6
7
8
# File 'lib/npr_scraper.rb', line 6

def country_page
  @country_page
end

#home_pageObject (readonly)

Returns the value of attribute home_page.



7
8
9
# File 'lib/npr_scraper.rb', line 7

def home_page
  @home_page
end

#path_to_country_dataObject (readonly)

Returns the value of attribute path_to_country_data.



7
8
9
# File 'lib/npr_scraper.rb', line 7

def path_to_country_data
  @path_to_country_data
end

#path_to_reactor_dataObject (readonly)

Returns the value of attribute path_to_reactor_data.



7
8
9
# File 'lib/npr_scraper.rb', line 7

def path_to_reactor_data
  @path_to_reactor_data
end

#pris_homeObject (readonly)

Returns the value of attribute pris_home.



7
8
9
# File 'lib/npr_scraper.rb', line 7

def pris_home
  @pris_home
end

#reactor_pageObject

Returns the value of attribute reactor_page.



6
7
8
# File 'lib/npr_scraper.rb', line 6

def reactor_page
  @reactor_page
end

Instance Method Details

#scrape_available_countriesObject



18
19
20
21
22
23
24
25
26
# File 'lib/npr_scraper.rb', line 18

def scrape_available_countries
  #scrapes the PRIS home page and returns a hash of country data that has the name & iso code for all available countries
  raw_text = Nokogiri::HTML(open(@pris_home))
  selection_list = raw_text.css(".box-content.shortCutBox").css("#MainContent_ddlCountry").css("option")
  selection_list.each_with_object({}) do |country, scraped_country_ids|
    scraped_country_ids[country.values[0]] = country.text unless country.text == ""
    #Builds a hash: scraped_country_ids = {country1_name => iso1, country2_name => iso2, ...}
  end
end

#scrape_available_reactorsObject



28
29
30
31
32
33
34
35
36
# File 'lib/npr_scraper.rb', line 28

def scrape_available_reactors
  #scrapes the PRIS home page and returns a hash of reactor data that has the name and id for all available reactors
  raw_text = Nokogiri::HTML(open(@pris_home))
  selection_list = raw_text.css(".box-content.shortCutBox").css("#MainContent_ddlReactors").css("option")
  selection_list.each_with_object({}) do |reactor, scraped_reactor_ids|
    scraped_reactor_ids[reactor.values[0]] = reactor.text unless reactor.text == ""
    #Builds a hash:  scraped_reactor_ids = {reactor1_name => id1, reactor2_name => id2, ...}
  end
end

#scrape_country_data(country_iso) ⇒ Object



38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# File 'lib/npr_scraper.rb', line 38

def scrape_country_data(country_iso)
  #scrapes the PRIS country_page and returns a hash of country data that lists the energy production and the names of reactors in that country
  @country_page = "#{@home_page}#{@path_to_country_data}#{country_iso}"
  raw_text = Nokogiri::HTML(open(@country_page))

  summary_data_keys = raw_text.css(".box-content").css("td").css("label")
  summary_data = raw_text.css(".box-content").css("td").css("h2")
  reactor_table = raw_text.css(".tablesorter").css("td").css("a")

  country = {}
  country[:iso] = country_iso
  summary_data_keys.each_with_index do |key, i|
    key_string = key.text.strip!.downcase!.match(/\b"?(\w+)\-?\s?(\w+)?\b/).captures
    if key_string[1].nil?
      country[key_string[0].to_sym] = summary_data[i].text.strip!
    else
      country[key_string.join("_").to_sym] = summary_data[i].text.strip!
    end
  end

  #calculate the share of energy produced with nuclear power vs total energy produced
  nep = country[:nuclear_electricity].gsub(/\sGW\.h/, "").to_f
  tep = country[:total_electricity].gsub(/\sGW\.h/, "").to_f
  country[:nuclear_e_share] = "#{((nep/tep)*100).round(2)}%"

  #find reactors
  country[:reactors] = reactor_table.collect {|reactor| reactor.text}
  country
end

#scrape_reactor_data(reactor_id) ⇒ Object



68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# File 'lib/npr_scraper.rb', line 68

def scrape_reactor_data(reactor_id)
  #scrapes the PRIS reactor_page and returns a hash of reactor data
  @reactor_page = "#{@home_page}#{@path_to_reactor_data}#{reactor_id}"
  raw_text = Nokogiri::HTML(open(@reactor_page))
  country_name = raw_text.css(".sidebar").css("#MainContent_litCaption").text.strip!
  reactor_data = raw_text.css(".box-content").css("span")

  reactor = {}
  reactor[:location] = country_name
  reactor[:status] = raw_text.css("#MainContent_MainContent_lblReactorStatus").text
  #add rest of the data with keys
  reactor_data.each do |data|
    reactor[data.values[0].match(/MainContent_MainContent_lbl(\w*)/).captures[0].to_sym] = data.text.strip
  end
  reactor
end