Class: Quandl::Format::Dataset::Load

Inherits:
Object
  • Object
show all
Defined in:
lib/quandl/format/dataset/load.rb

Constant Summary collapse

SYNTAX =
{
  comment:          '#',
  data:             '-',
  attribute:        /^([a-z0-9_]+): (.+)/,
}

Class Method Summary collapse

Class Method Details

.each_in_file(path, &block) ⇒ Object



11
12
13
# File 'lib/quandl/format/dataset/load.rb', line 11

def each_in_file(path, &block)
  each_line( File.open(path, "r"), &block )
end

.each_line(interface, &block) ⇒ Object



15
16
17
18
19
20
21
22
23
# File 'lib/quandl/format/dataset/load.rb', line 15

def each_line(interface, &block)
  node = new_node
  # for each file line
  interface.each_line do |line|
    # process line
    node = process_line(line, node, &block)
  end
  process_tail(node, &block)
end

.file(path) ⇒ Object



25
26
27
# File 'lib/quandl/format/dataset/load.rb', line 25

def file(path)
  string( File.read(path) )
end

.new_node(line = 0) ⇒ Object



45
46
47
# File 'lib/quandl/format/dataset/load.rb', line 45

def new_node(line=0)
  { line: line, section: :attributes, data: '', attributes: '', data_line: 0, offset: line==0 ? 0 : line-1 }
end

.parse_node(node) ⇒ Object



112
113
114
115
116
117
118
119
120
# File 'lib/quandl/format/dataset/load.rb', line 112

def parse_node(node)
  # parse attrs as yaml
  node[:attributes] = parse_yaml_attributes(node)
  # we cant continue unless attributes are present
  return false if node[:attributes].blank?
  # parse data as csv
  node[:data] = Quandl::Data::Format.csv_to_array(node[:data])
  node
end

.process_line(rline, node, &block) ⇒ Object



55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# File 'lib/quandl/format/dataset/load.rb', line 55

def process_line(rline, node, &block)
  # increment node line
  node[:line] += 1
  # strip whitespace
  line = rline.strip.rstrip
  # skip comments and blank lines
  if line[0] == SYNTAX[:comment] || line.blank?
    node[:attributes] += "\n" if node[:section] == :attributes
    return node
  end
  # looking at an attribute?
  if line =~ SYNTAX[:attribute]
    # exiting data section?
    if node[:section] == :data
      # we've reached the end of a node
      # send it to the server
      process_node(node, &block)
      # start a new node while retaining current line line
      node = new_node( node[:line] )
    end
    # update the node's section
    node[:section] = :attributes
  # entering the data section?
  elsif line[0] == SYNTAX[:data]
    # update the node
    node[:data_line] = node[:line] + 1
    node[:section] = :data
    # skip to the next line
    return node
  end
  # strip extra commas
  rline = rline.gsub(/,+$/,'') if node[:section] == :attributes
  # append the line to the requested section
  node[ node[:section] ] += ( node[:section] == :data ) ? "#{line}\n" : rline
  # return the updated node
  node
end

.process_node(node, &block) ⇒ Object



93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# File 'lib/quandl/format/dataset/load.rb', line 93

def process_node(node, &block)
  begin
    node = parse_node(node)
    # fail on errored node
    return false if node == false
    # convert node to dataset
    dataset = convert_node_to_dataset(node)
    # do whatever we need to do with the node
    block.call( dataset, nil ) unless dataset.nil?
    # success
    true
    
  rescue Exception => err
    block.call( nil, err )
    false
    
  end
end

.process_tail(node, &block) ⇒ Object



49
50
51
52
53
# File 'lib/quandl/format/dataset/load.rb', line 49

def process_tail(node, &block)
  # signify end
  process_line('-', node, &block)
  process_line('tail: end', node, &block)
end

.string(input) ⇒ Object



29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/quandl/format/dataset/load.rb', line 29

def string(input)
  # prepare to collect all datasets
  datasets = []
  # initialize blank node
  node = new_node
  # for each line
  input.each_line do |line|
    # process each line when encountering dataset append it to datasets
    node = process_line( line, node ){|d| datasets << d }
  end
  # signify end
  process_tail(node){|d| datasets << d }
  # return datasets
  datasets
end