Class: Quandl::Format::Dataset::Load
- Inherits:
-
Object
- Object
- Quandl::Format::Dataset::Load
- Defined in:
- lib/quandl/format/dataset/load.rb
Constant Summary collapse
- SYNTAX =
{ comment: '#', data: '-', attribute: /^([a-z0-9_]+): (.+)/, }
Class Method Summary collapse
- .each_in_file(path, &block) ⇒ Object
- .each_line(interface, &block) ⇒ Object
- .file(path) ⇒ Object
- .new_node(line = 0) ⇒ Object
- .parse_node(node) ⇒ Object
- .process_line(rline, node, &block) ⇒ Object
- .process_node(node, &block) ⇒ Object
- .process_tail(node, &block) ⇒ Object
- .string(input) ⇒ Object
Class Method Details
.each_in_file(path, &block) ⇒ Object
11 12 13 |
# File 'lib/quandl/format/dataset/load.rb', line 11 def each_in_file(path, &block) each_line( File.open(path, "r"), &block ) end |
.each_line(interface, &block) ⇒ Object
15 16 17 18 19 20 21 22 23 |
# File 'lib/quandl/format/dataset/load.rb', line 15 def each_line(interface, &block) node = new_node # for each file line interface.each_line do |line| # process line node = process_line(line, node, &block) end process_tail(node, &block) end |
.file(path) ⇒ Object
25 26 27 |
# File 'lib/quandl/format/dataset/load.rb', line 25 def file(path) string( File.read(path) ) end |
.new_node(line = 0) ⇒ Object
45 46 47 |
# File 'lib/quandl/format/dataset/load.rb', line 45 def new_node(line=0) { line: line, section: :attributes, data: '', attributes: '', data_line: 0, offset: line==0 ? 0 : line-1 } end |
.parse_node(node) ⇒ Object
112 113 114 115 116 117 118 119 120 |
# File 'lib/quandl/format/dataset/load.rb', line 112 def parse_node(node) # parse attrs as yaml node[:attributes] = parse_yaml_attributes(node) # we cant continue unless attributes are present return false if node[:attributes].blank? # parse data as csv node[:data] = Quandl::Data::Format.csv_to_array(node[:data]) node end |
.process_line(rline, node, &block) ⇒ Object
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
# File 'lib/quandl/format/dataset/load.rb', line 55 def process_line(rline, node, &block) # increment node line node[:line] += 1 # strip whitespace line = rline.strip.rstrip # skip comments and blank lines if line[0] == SYNTAX[:comment] || line.blank? node[:attributes] += "\n" if node[:section] == :attributes return node end # looking at an attribute? if line =~ SYNTAX[:attribute] # exiting data section? if node[:section] == :data # we've reached the end of a node # send it to the server process_node(node, &block) # start a new node while retaining current line line node = new_node( node[:line] ) end # update the node's section node[:section] = :attributes # entering the data section? elsif line[0] == SYNTAX[:data] # update the node node[:data_line] = node[:line] + 1 node[:section] = :data # skip to the next line return node end # strip extra commas rline = rline.gsub(/,+$/,'') if node[:section] == :attributes # append the line to the requested section node[ node[:section] ] += ( node[:section] == :data ) ? "#{line}\n" : rline # return the updated node node end |
.process_node(node, &block) ⇒ Object
93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
# File 'lib/quandl/format/dataset/load.rb', line 93 def process_node(node, &block) begin node = parse_node(node) # fail on errored node return false if node == false # convert node to dataset dataset = convert_node_to_dataset(node) # do whatever we need to do with the node block.call( dataset, nil ) unless dataset.nil? # success true rescue Exception => err block.call( nil, err ) false end end |
.process_tail(node, &block) ⇒ Object
49 50 51 52 53 |
# File 'lib/quandl/format/dataset/load.rb', line 49 def process_tail(node, &block) # signify end process_line('-', node, &block) process_line('tail: end', node, &block) end |
.string(input) ⇒ Object
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
# File 'lib/quandl/format/dataset/load.rb', line 29 def string(input) # prepare to collect all datasets datasets = [] # initialize blank node node = new_node # for each line input.each_line do |line| # process each line when encountering dataset append it to datasets node = process_line( line, node ){|d| datasets << d } end # signify end process_tail(node){|d| datasets << d } # return datasets datasets end |