Class: IrsPub78::Client
- Inherits:
-
Object
- Object
- IrsPub78::Client
- Defined in:
- lib/irs_pub78/client.rb
Instance Method Summary collapse
-
#download ⇒ Object
grab .zip file from IRS.gov and save it locally.
- #find(city = "", state_id = "") ⇒ Object
-
#parse_txt_to_json ⇒ Object
Open the .txt file Read it and loop through it and write the data to a hash.
-
#process_nodes(nodes = nil) ⇒ Object
accepts nodes from Nokogiri.
-
#unzip(file, target = "unzipped") ⇒ Object
file = the file you want to unzip target = the relative directory you want to unzip to.
Instance Method Details
#download ⇒ Object
grab .zip file from IRS.gov and save it locally
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
# File 'lib/irs_pub78/client.rb', line 39 def download unless File.exist? "data/eopub78.zip" @url = "http://www.irs.gov/pub/irs-utl/eopub78.zip" stream = open(@url).read directory_name = Dir::pwd + "/" + "data" unless FileTest::directory?(directory_name) Dir::mkdir(directory_name) end File.open("data/eopub78.zip", "wb") { |f| f << stream } else puts "The file has already been downloaded. Delete and try again if you really want the latest." end end |
#find(city = "", state_id = "") ⇒ Object
11 12 13 14 15 16 17 |
# File 'lib/irs_pub78/client.rb', line 11 def find(city = "", state_id = "") raise ArgumentError unless !city.empty? && !state_id.empty? @url = "http://www.irs.gov/app/pub-78/search.do?resultsPerPage=500&nameSearchTypeStarts=false&names=&nameSearchTypeAll=false&city=#{URI.escape(city.downcase)}&state=#{state_id.upcase}&country=USA&deductibility=all&sortColumn=name&indexOfFirstRow=0&isDescending=false&dispatchMethod=search" stream = open(@url).read nodes = Nokogiri::HTML(stream) process_nodes(nodes) end |
#parse_txt_to_json ⇒ Object
Open the .txt file Read it and loop through it and write the data to a hash
69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
# File 'lib/irs_pub78/client.rb', line 69 def parse_txt_to_json entities = [] i = 0 # count file_to_process = Dir.glob("data/eopub78/*.txt")[0] lines = File.open(file_to_process, "r").readlines lines[i..-1].each do |line| # ignore the first header line i = i + 1 entities << { :name => (line[0..105].strip rescue ""), :city => (line[106..136].strip rescue ""), :state_id => (line[137..138].strip rescue ""), :code => (line[141..141].strip rescue "") } puts i if $debug end File.open("data/irs.json", "w") { |f| f << entities.to_json } end |
#process_nodes(nodes = nil) ⇒ Object
accepts nodes from Nokogiri
20 21 22 23 24 25 26 27 28 29 30 31 32 33 |
# File 'lib/irs_pub78/client.rb', line 20 def process_nodes(nodes = nil) rows = nodes.css(".epi-dataTable tr") nonprofits = [] rows.each do |r| attrs = {} attrs["name"] = r.css("td")[0].text.strip rescue 'error' attrs["city"] = r.css("td")[1].text.strip rescue 'error' attrs["state_id"] = r.css("td")[2].text.strip rescue 'error' attrs["code"] = r.css("td")[3].text.strip rescue 'error' nonprofits << attrs end nonprofits end |
#unzip(file, target = "unzipped") ⇒ Object
file = the file you want to unzip target = the relative directory you want to unzip to
57 58 59 60 61 62 63 64 65 |
# File 'lib/irs_pub78/client.rb', line 57 def unzip(file, target = "unzipped") Zip::ZipFile.open(file) { |zip| zip.each { |file| path = File.join(target, file.name) FileUtils.mkdir_p(File.dirname(path)) zip.extract(file, path) unless File.exist?(path) } } end |