Class: Datasets::Rdataset

Inherits:
Dataset
  • Object
show all
Defined in:
lib/datasets/rdataset.rb

Instance Attribute Summary

Attributes inherited from Dataset

#metadata

Instance Method Summary collapse

Methods inherited from Dataset

#clear_cache!, #to_table

Constructor Details

#initialize(package_name, dataset_name) ⇒ Rdataset

Returns a new instance of Rdataset.



62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# File 'lib/datasets/rdataset.rb', line 62

def initialize(package_name, dataset_name)
  list = RdatasetList.new

  info = list.filter(package: package_name, dataset: dataset_name).first
  unless info
    raise ArgumentError, "Unable to locate dataset #{package_name}/#{dataset_name}"
  end

  super()
  @metadata.id = "rdataset-#{package_name}-#{dataset_name}"
  @metadata.name = "Rdataset: #{package_name}: #{dataset_name}"
  @metadata.url = info.csv
  @metadata.licenses = ["GPL-3"]
  @metadata.description = info.title

  # Follow the original directory structure in the cache directory
  @data_path = cache_dir_path + (dataset_name + ".csv")

  @package_name = package_name
  @dataset_name = dataset_name
end

Instance Method Details

#each(&block) ⇒ Object



84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# File 'lib/datasets/rdataset.rb', line 84

def each(&block)
  return to_enum(__method__) unless block_given?

  download(@data_path, @metadata.url)

  na_converter = lambda do |field|
    begin
      if field.encode(CSV::ConverterEncoding) == "NA"
        nil
      else
        field
      end
    rescue
      field
    end
  end

  inf_converter = lambda do |field|
    begin
      if field.encode(CSV::ConverterEncoding) == "Inf"
        Float::INFINITY
      else
        field
      end
    rescue
      field
    end
  end

  quote_preserving_converter = lambda do |field, info|
    f = field.encode(CSV::ConverterEncoding)
    return f if info.quoted?

    begin
      begin
        begin
          return DateTime.parse(f) if f.match?(DateTimeMatcher)
        rescue
          return Integer(f)
        end
      rescue
        return Float(f)
      end
    rescue
      field
    end
  end

  table = CSV.table(@data_path,
                    header_converters: [:symbol_raw],
                    # quote_preserving_converter should be the last
                    converters: [na_converter, inf_converter, quote_preserving_converter])
  table.delete(:"") # delete 1st column for indices.

  table.each do |row|
    yield row.to_h
  end
end