Class: IrsPub78::Client

Inherits:
Object
  • Object
show all
Defined in:
lib/irs_pub78/client.rb

Instance Method Summary collapse

Instance Method Details

#downloadObject

grab .zip file from IRS.gov and save it locally



39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# File 'lib/irs_pub78/client.rb', line 39

def download
  unless File.exist? "data/eopub78.zip"
    @url   = "http://www.irs.gov/pub/irs-utl/eopub78.zip"
    stream = open(@url).read

    directory_name = Dir::pwd + "/" + "data"
    unless FileTest::directory?(directory_name)
      Dir::mkdir(directory_name)
    end

    File.open("data/eopub78.zip", "wb") { |f| f << stream }
  else
    puts "The file has already been downloaded.  Delete and try again if you really want the latest."
  end
end

#find(city = "", state_id = "") ⇒ Object

Raises:

  • (ArgumentError)


11
12
13
14
15
16
17
# File 'lib/irs_pub78/client.rb', line 11

def find(city = "", state_id = "")
  raise ArgumentError unless !city.empty? && !state_id.empty?
  @url   = "http://www.irs.gov/app/pub-78/search.do?resultsPerPage=500&nameSearchTypeStarts=false&names=&nameSearchTypeAll=false&city=#{URI.escape(city.downcase)}&state=#{state_id.upcase}&country=USA&deductibility=all&sortColumn=name&indexOfFirstRow=0&isDescending=false&dispatchMethod=search"
  stream = open(@url).read
  nodes  = Nokogiri::HTML(stream)
  process_nodes(nodes)
end

#parse_txt_to_jsonObject

Open the .txt file Read it and loop through it and write the data to a hash



69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# File 'lib/irs_pub78/client.rb', line 69

def parse_txt_to_json
  entities        = []
  i               = 0 # count
  file_to_process = Dir.glob("data/eopub78/*.txt")[0]
  lines           = File.open(file_to_process, "r").readlines
  lines[i..-1].each do |line| # ignore the first header line
    i = i + 1
    entities << {
        :name     => (line[0..105].strip rescue ""),
        :city     => (line[106..136].strip rescue ""),
        :state_id => (line[137..138].strip rescue ""),
        :code     => (line[141..141].strip rescue "")
    }
    puts i if $debug
  end

  File.open("data/irs.json", "w") { |f| f << entities.to_json }
end

#process_nodes(nodes = nil) ⇒ Object

accepts nodes from Nokogiri



20
21
22
23
24
25
26
27
28
29
30
31
32
33
# File 'lib/irs_pub78/client.rb', line 20

def process_nodes(nodes = nil)
  rows       = nodes.css(".epi-dataTable tr")
  nonprofits = []
  rows.each do |r|
    attrs = {}
    attrs["name"] = r.css("td")[0].text.strip rescue 'error'
    attrs["city"] = r.css("td")[1].text.strip rescue 'error'
    attrs["state_id"] = r.css("td")[2].text.strip rescue 'error'
    attrs["code"] = r.css("td")[3].text.strip rescue 'error'
    nonprofits << attrs
  end

  nonprofits
end

#unzip(file, target = "unzipped") ⇒ Object

file = the file you want to unzip target = the relative directory you want to unzip to



57
58
59
60
61
62
63
64
65
# File 'lib/irs_pub78/client.rb', line 57

def unzip(file, target = "unzipped")
  Zip::ZipFile.open(file) { |zip|
    zip.each { |file|
      path = File.join(target, file.name)
      FileUtils.mkdir_p(File.dirname(path))
      zip.extract(file, path) unless File.exist?(path)
    }
  }
end