Class: Ingestor::Proxy
- Inherits:
-
Struct
- Object
- Struct
- Ingestor::Proxy
- Defined in:
- lib/ingestor/proxy.rb
Instance Attribute Summary collapse
-
#file ⇒ Object
Returns the value of attribute file.
-
#options ⇒ Object
Returns the value of attribute options.
Instance Method Summary collapse
- #compressed? ⇒ Boolean
-
#continue_from(line_num) ⇒ Object
for debugging, testing.
- #default_processor(attrs, record) ⇒ Object
- #document ⇒ Object
- #finder ⇒ Object
- #header ⇒ Object
- #load ⇒ Object
-
#load_compressed ⇒ Object
When loading compressed files the assumption is that if there is more than one that the files are chunked, they will be put together and treated as one large file.
- #load_remote ⇒ Object
- #local? ⇒ Boolean
-
#process_entry(attrs) ⇒ Object
To be called from Parsers, send a attributes, get a record.
- #process_record(attrs, record) ⇒ Object
- #remote? ⇒ Boolean
- #start! ⇒ Object
- #working_directory ⇒ Object
Instance Attribute Details
#file ⇒ Object
Returns the value of attribute file
2 3 4 |
# File 'lib/ingestor/proxy.rb', line 2 def file @file end |
#options ⇒ Object
Returns the value of attribute options
2 3 4 |
# File 'lib/ingestor/proxy.rb', line 2 def @options end |
Instance Method Details
#compressed? ⇒ Boolean
23 |
# File 'lib/ingestor/proxy.rb', line 23 def compressed?; [:compressed]; end |
#continue_from(line_num) ⇒ Object
for debugging, testing
26 27 28 29 |
# File 'lib/ingestor/proxy.rb', line 26 def continue_from(line_num) @document.rewind @document.drop( line_num -1 ).take(1) end |
#default_processor(attrs, record) ⇒ Object
67 68 69 |
# File 'lib/ingestor/proxy.rb', line 67 def default_processor(attrs,record) record.update_attributes( attrs ) end |
#document ⇒ Object
3 4 5 |
# File 'lib/ingestor/proxy.rb', line 3 def document @document end |
#finder ⇒ Object
31 32 33 |
# File 'lib/ingestor/proxy.rb', line 31 def finder [:finder] end |
#header ⇒ Object
7 8 9 |
# File 'lib/ingestor/proxy.rb', line 7 def header @header end |
#load ⇒ Object
100 101 102 103 104 105 106 107 |
# File 'lib/ingestor/proxy.rb', line 100 def load Dir.mkdir(working_directory, 0777) unless Dir.exists?(working_directory) load_remote if remote? load_compressed if compressed? @document ||= File.new( file ) end |
#load_compressed ⇒ Object
When loading compressed files the assumption is that if there is more than one that the files are chunked, they will be put together and treated as one large file
85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
# File 'lib/ingestor/proxy.rb', line 85 def load_compressed Ingestor::LOG.debug("Compressed file detected #{file}...") @tempfile = @document @document = Tempfile.new("decompressed", working_directory) @document.binmode Zip::ZipFile.open(@tempfile.path) do |zipfile| zipfile.each do |entry| istream = entry.get_input_stream @document.write istream.read end end @document.rewind end |
#load_remote ⇒ Object
71 72 73 74 75 76 77 78 79 80 81 |
# File 'lib/ingestor/proxy.rb', line 71 def load_remote Ingestor::LOG.debug("Remote file detected #{file}...") @document = Tempfile.new("local", working_directory) @document.binmode if compressed? open( file, 'rb' ) do |remote_file| Ingestor::LOG.debug("Downloading #{file}...") @document.write remote_file.read @document.rewind end end |
#local? ⇒ Boolean
15 16 17 |
# File 'lib/ingestor/proxy.rb', line 15 def local? !remote? end |
#process_entry(attrs) ⇒ Object
To be called from Parsers, send a attributes, get a record
53 54 55 56 57 58 59 60 61 |
# File 'lib/ingestor/proxy.rb', line 53 def process_entry( attrs ) [:before].call(attrs) if [:before] record = finder ? finder.call(attrs) : nil process_record(attrs,record) [:after].call(record) if [:after] record end |
#process_record(attrs, record) ⇒ Object
63 64 65 |
# File 'lib/ingestor/proxy.rb', line 63 def process_record(attrs,record) [:processor] ? [:processor].call(attrs, record) : default_processor(attrs, record) end |
#remote? ⇒ Boolean
11 12 13 |
# File 'lib/ingestor/proxy.rb', line 11 def remote? file =~ /http(s)?|ftp/i end |
#start! ⇒ Object
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
# File 'lib/ingestor/proxy.rb', line 35 def start! load Ingestor::LOG.warn("No #finder specified") if !finder @header = @document.gets.strip if [:includes_header] parser = Ingestor.parser_for( [:parser] ).new(self, @document) parser.( [:parser_options] ) unless [:sample] parser.process! else parser.sample! end self end |
#working_directory ⇒ Object
19 20 21 |
# File 'lib/ingestor/proxy.rb', line 19 def working_directory [:working_directory] end |