Class: HeyDan::Helper

Inherits:

Object

Object
HeyDan::Helper

Defined in:: lib/heydan/helper.rb

Class Method Summary collapse

Class Method Details

.classify(name) ⇒ `Object`



11
12
13

# File 'lib/heydan/helper.rb', line 11

def classify(name)
  name.split('_').collect(&:capitalize).join
end

.dataset_exists?(name) ⇒ `Boolean`

Returns:

(Boolean)



39
40
41

# File 'lib/heydan/helper.rb', line 39

def dataset_exists?(name)
  File.exist?(File.join(HeyDan.folders[:datasets], "#{name.gsub('.csv', '')}.csv"))
end

.download(url) ⇒ `Object`

# File 'lib/heydan/helper.rb', line 15

def download(url)
  path = HeyDan.folders[:downloads]
  FileUtils.mkdir_p path if !Dir.exists?(path)
  new_file = File.join(path, md5_name(url))
  return new_file if File.exist?(new_file)
  download_file(url, new_file)
  new_file
end

.download_file(url, file_path) ⇒ `Object`

# File 'lib/heydan/helper.rb', line 139

def download_file(url,file_path)
  f = open(url)
  full_path = File.expand_path(file_path)
  File.open(full_path, 'wb') do |saved_file|
    saved_file.write(f.read)
  end 
  full_path
end

.get_csv_data(file) ⇒ `Object`

# File 'lib/heydan/helper.rb', line 124

def get_csv_data(file)
  contents = File.read(file, :encoding => 'utf-8').encode("UTF-8", :invalid=>:replace, :replace=>"").gsub('"',"")

  if contents.include?("\t")
    CSV.parse(contents, { :col_sep => "\t" })
  else
    CSV.parse(contents)
  end
  
end

.get_data(name) ⇒ `Object`



35
36
37

# File 'lib/heydan/helper.rb', line 35

def get_data(name)
  CSV.read(File.join(HeyDan.folders[:datasets], "#{name.gsub('.csv', '')}.csv"))
end

.get_data_from_url(url) ⇒ `Object`

# File 'lib/heydan/helper.rb', line 43

def get_data_from_url(url)
  ext = get_file_type_from_url(url)
  file = download(url)
  @data = case ext
    when 'csv'
      get_csv_data(file)
    when 'xls'
      get_excel_data(file)
    when 'xlsx'
      get_excel_data(file, 'xlsx')
    when 'zip'
      files = unzip(file)
      return get_shapefile_data(files) if is_shapefile?(files)
      if files.size == 1
        return get_csv_data(files[0]) if is_csv?(files[0])
        return get_excel_data(files[0]) if is_excel?(files)
      else
        files.map { |f| get_csv_data(f) if is_csv?(f)} 
      end
    when 'txt'
      get_csv_data(file) if is_csv?(file)
    when 'shp'
      get_shapefile_data(file)
    else
      get_csv_data(file) if is_csv?(file)
    end
  @data
end

.get_excel_data(file, type = 'xls') ⇒ `Object`

# File 'lib/heydan/helper.rb', line 106

def get_excel_data(file, type='xls')
  if type == 'xls'
    require 'spreadsheet'
    book = Spreadsheet.open file
    data = book.worksheets.map(&:rows)
  else
    require 'rubyXL'
    book = RubyXL::Parser.parse(file)
    data = book.worksheets.map do |w| 
      w.sheet_data.rows.map { |row|
        row.cells.map { |c| c.nil? ? nil : c.value } unless row.nil?
      }
    end
  end
  return data[0] if data.size == 1
  data
end

.get_excel_file(files) ⇒ `Object`



98
99
100

# File 'lib/heydan/helper.rb', line 98

def get_excel_file(files)
  files.select { |file| file.to_s.include?('.xls') || file.to_s.include?('.xlsx')}[0]
end

.get_file_type_from_url(url) ⇒ `Object`



148
149
150

# File 'lib/heydan/helper.rb', line 148

def get_file_type_from_url(url)
  file_type = File.extname(URI.parse(url).path).gsub('.', '') 
end

.get_shapefile(shapefile_array) ⇒ `Object`



81
82
83

# File 'lib/heydan/helper.rb', line 81

def get_shapefile(shapefile_array)
  shapefile_array.select { |file| file.to_s.include?('.shp')}[0]
end

.get_shapefile_data(shapefile_array) ⇒ `Object`

# File 'lib/heydan/helper.rb', line 85

def get_shapefile_data(shapefile_array)
  file = get_shapefile(shapefile_array)
  require 'geo_ruby'
  require 'geo_ruby/shp'
  
  shp = GeoRuby::Shp4r::ShpFile.open(file)
  data = [shp.fields.map(&:name) + ['geojson']]
  shp.records.each do |record|
    data << (record.data.attributes.values + [record.geometry.as_json])
  end
  data
end

.is_csv?(file_path) ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/heydan/helper.rb', line 72

def is_csv?(file_path)
  contents = File.open(file_path, &:readline)
  contents.match(/\b\t/).nil? || contents.match(/\b,/).nil? #not perfect
end

.is_excel?(files) ⇒ `Boolean`

Returns:

(Boolean)



102
103
104

# File 'lib/heydan/helper.rb', line 102

def is_excel?(files)
  !get_excel_file(files).nil?
end

.is_shapefile?(shapefile_array) ⇒ `Boolean`

Returns:

(Boolean)



77
78
79

# File 'lib/heydan/helper.rb', line 77

def is_shapefile?(shapefile_array)
  !get_shapefile(shapefile_array).nil?
end

.md5_name(text) ⇒ `Object`



135
136
137

# File 'lib/heydan/helper.rb', line 135

def md5_name(text)
  Digest::MD5.hexdigest(text)
end

.save_data(name, data) ⇒ `Object`

# File 'lib/heydan/helper.rb', line 24

def save_data(name, data)
  path = HeyDan.folders[:datasets]
  FileUtils.mkdir_p path if !Dir.exists?(path)
  full_path = File.expand_path(File.join(path, "#{name.gsub('.csv', '')}.csv"))
  CSV.open(full_path, 'w') do |csv|
    data.each do |row|
      csv << row
    end
  end
end

.unzip(file) ⇒ `Object`

# File 'lib/heydan/helper.rb', line 152

def unzip(file)
  path = HeyDan.folders[:downloads]
  require 'zip'
  files = []
  Zip::File.open(file) do |zip_file|
    zip_file.each do |entry|
      download_path = File.expand_path(File.join(path, entry.name))
      entry.extract(download_path) unless File.exists?(download_path)
      files << download_path
    end
  end
  files
end

Class: HeyDan::Helper

Class Method Summary collapse

Class Method Details

.classify(name) ⇒ Object

.dataset_exists?(name) ⇒ Boolean

.download(url) ⇒ Object

.download_file(url, file_path) ⇒ Object

.get_csv_data(file) ⇒ Object

.get_data(name) ⇒ Object

.get_data_from_url(url) ⇒ Object

.get_excel_data(file, type = 'xls') ⇒ Object

.get_excel_file(files) ⇒ Object

.get_file_type_from_url(url) ⇒ Object

.get_shapefile(shapefile_array) ⇒ Object

.get_shapefile_data(shapefile_array) ⇒ Object

.is_csv?(file_path) ⇒ Boolean

.is_excel?(files) ⇒ Boolean

.is_shapefile?(shapefile_array) ⇒ Boolean

.md5_name(text) ⇒ Object

.save_data(name, data) ⇒ Object

.unzip(file) ⇒ Object

.classify(name) ⇒ `Object`

.dataset_exists?(name) ⇒ `Boolean`

.download(url) ⇒ `Object`

.download_file(url, file_path) ⇒ `Object`

.get_csv_data(file) ⇒ `Object`

.get_data(name) ⇒ `Object`

.get_data_from_url(url) ⇒ `Object`

.get_excel_data(file, type = 'xls') ⇒ `Object`

.get_excel_file(files) ⇒ `Object`

.get_file_type_from_url(url) ⇒ `Object`

.get_shapefile(shapefile_array) ⇒ `Object`

.get_shapefile_data(shapefile_array) ⇒ `Object`

.is_csv?(file_path) ⇒ `Boolean`

.is_excel?(files) ⇒ `Boolean`

.is_shapefile?(shapefile_array) ⇒ `Boolean`

.md5_name(text) ⇒ `Object`

.save_data(name, data) ⇒ `Object`

.unzip(file) ⇒ `Object`