Class: Ingestor::Proxy

Inherits:
Struct
  • Object
show all
Defined in:
lib/ingestor/proxy.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#fileObject

Returns the value of attribute file

Returns:

  • (Object)

    the current value of file



2
3
4
# File 'lib/ingestor/proxy.rb', line 2

def file
  @file
end

#optionsObject

Returns the value of attribute options

Returns:

  • (Object)

    the current value of options



2
3
4
# File 'lib/ingestor/proxy.rb', line 2

def options
  @options
end

Instance Method Details

#compressed?Boolean

Returns:

  • (Boolean)


23
# File 'lib/ingestor/proxy.rb', line 23

def compressed?; options[:compressed]; end

#continue_from(line_num) ⇒ Object

for debugging, testing



26
27
28
29
# File 'lib/ingestor/proxy.rb', line 26

def continue_from(line_num)
  @document.rewind
  @document.drop( line_num -1 ).take(1)
end

#default_processor(attrs, record) ⇒ Object



67
68
69
# File 'lib/ingestor/proxy.rb', line 67

def default_processor(attrs,record)
  record.update_attributes( attrs )
end

#documentObject



3
4
5
# File 'lib/ingestor/proxy.rb', line 3

def document
  @document
end

#finderObject



31
32
33
# File 'lib/ingestor/proxy.rb', line 31

def finder
  options[:finder]
end

#headerObject



7
8
9
# File 'lib/ingestor/proxy.rb', line 7

def header
  @header
end

#loadObject



100
101
102
103
104
105
106
107
# File 'lib/ingestor/proxy.rb', line 100

def load
  Dir.mkdir(working_directory, 0777) unless Dir.exists?(working_directory)

  load_remote if remote?
  load_compressed if compressed?

  @document ||= File.new( file )
end

#load_compressedObject

When loading compressed files the assumption is that if there is more than one that the files are chunked, they will be put together and treated as one large file



85
86
87
88
89
90
91
92
93
94
95
96
97
98
# File 'lib/ingestor/proxy.rb', line 85

def load_compressed
  Ingestor::LOG.debug("Compressed file detected #{file}...")
  @tempfile     = @document
  @document = Tempfile.new("decompressed", working_directory)
  @document.binmode
  
  Zip::ZipFile.open(@tempfile.path) do |zipfile|
    zipfile.each do |entry|
      istream = entry.get_input_stream
      @document.write istream.read
    end
  end
  @document.rewind
end

#load_remoteObject



71
72
73
74
75
76
77
78
79
80
81
# File 'lib/ingestor/proxy.rb', line 71

def load_remote
  Ingestor::LOG.debug("Remote file detected #{file}...")
  @document = Tempfile.new("local", working_directory)
  @document.binmode if compressed?

  open( file, 'rb' ) do |remote_file|
    Ingestor::LOG.debug("Downloading #{file}...")
    @document.write remote_file.read
    @document.rewind
  end
end

#local?Boolean

Returns:

  • (Boolean)


15
16
17
# File 'lib/ingestor/proxy.rb', line 15

def local?
  !remote?
end

#process_entry(attrs) ⇒ Object

To be called from Parsers, send a attributes, get a record



53
54
55
56
57
58
59
60
61
# File 'lib/ingestor/proxy.rb', line 53

def process_entry( attrs )
  options[:before].call(attrs) if options[:before]
        
  record = finder ? finder.call(attrs) : nil

  process_record(attrs,record)
  options[:after].call(record) if options[:after]
  record
end

#process_record(attrs, record) ⇒ Object



63
64
65
# File 'lib/ingestor/proxy.rb', line 63

def process_record(attrs,record)
  options[:processor] ? options[:processor].call(attrs, record) : default_processor(attrs, record)
end

#remote?Boolean

Returns:

  • (Boolean)


11
12
13
# File 'lib/ingestor/proxy.rb', line 11

def remote?
  file =~ /http(s)?|ftp/i
end

#start!Object



35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/ingestor/proxy.rb', line 35

def start!
  load
  Ingestor::LOG.warn("No #finder specified") if !finder
  @header = @document.gets.strip if options[:includes_header]

  parser = Ingestor.parser_for( options[:parser] ).new(self, @document)
  parser.options( options[:parser_options] )

  unless options[:sample]
    parser.process!
  else
    parser.sample!
  end
        
  self
end

#working_directoryObject



19
20
21
# File 'lib/ingestor/proxy.rb', line 19

def working_directory
  options[:working_directory]
end