Class: Cure::Extract::NamedRangeProcessor
- Inherits:
-
BaseProcessor
- Object
- BaseProcessor
- Cure::Extract::NamedRangeProcessor
- Defined in:
- lib/cure/extract/named_range_processor.rb
Instance Attribute Summary collapse
-
#candidate_nrs ⇒ Array<Extraction::NamedRange>
readonly
Named_ranges.
Attributes inherited from BaseProcessor
Instance Method Summary collapse
- #after_process ⇒ Object
- #calculate_row_bounds ⇒ Range
-
#initialize(database_service, candidate_nrs) ⇒ NamedRangeProcessor
constructor
A new instance of NamedRangeProcessor.
- #process_row(row_idx, csv_row) ⇒ Object
-
#row_bounds ⇒ Range
This covers the max size of all named ranges.
Constructor Details
#initialize(database_service, candidate_nrs) ⇒ NamedRangeProcessor
Returns a new instance of NamedRangeProcessor.
17 18 19 20 21 22 23 |
# File 'lib/cure/extract/named_range_processor.rb', line 17 def initialize(database_service, candidate_nrs) @candidate_nrs = candidate_nrs @cache = init_cache @tables_created = [] super database_service end |
Instance Attribute Details
#candidate_nrs ⇒ Array<Extraction::NamedRange> (readonly)
Returns named_ranges.
15 16 17 |
# File 'lib/cure/extract/named_range_processor.rb', line 15 def candidate_nrs @candidate_nrs end |
Instance Method Details
#after_process ⇒ Object
93 94 95 96 97 |
# File 'lib/cure/extract/named_range_processor.rb', line 93 def after_process @cache.each do |named_range, cache| insert_cache(named_range) if cache.size.positive? end end |
#calculate_row_bounds ⇒ Range
106 107 108 109 |
# File 'lib/cure/extract/named_range_processor.rb', line 106 def calculate_row_bounds positions = @candidate_nrs.map(&:row_bounds).flatten.sort (positions.first..positions.last) end |
#process_row(row_idx, csv_row) ⇒ Object
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
# File 'lib/cure/extract/named_range_processor.rb', line 27 def process_row(row_idx, csv_row) # rubocop:disable all # Return if row is not in any named range return unless row_bounds.cover?(row_idx) # Iterate over the NR's, if its inside those bounds, add it @candidate_nrs.each do |nr| next unless nr.row_in_bounds?(row_idx) if nr.filter.row_handler.has_content? unless nr.filter.row_handler.including_proc[:where].call(csv_row) nr.row_count += 1 next end end # Row is inbounds - we need to do two things, filter the content, create the table, insert the row if nr.header_in_bounds?(nr.active_row_count(row_idx)) column_headers = csv_row[nr.section[0]..nr.section[1]] if nr.filter.col_handler.has_content? nr.filter.col_handler.set_col_positions(column_headers) column_headers = nr.filter.col_handler.translate_headers(column_headers) end # Create table, flush cache create_table(nr.name, column_headers) @tables_created << nr.name @cache[nr.name].each do |row| insert_record( nr.name, nr.filter.col_handler.filter_row(row) ) end @cache[nr.name] = [] # Evict cache next end next unless nr.content_in_bounds?(row_idx) # 0. Remove unnecessary columns # 2. If cache is over n records and if the table exists, # add it to the database. filtered_row = nr.filter.col_handler.filter_row( csv_row[nr.section[0]..nr.section[1]] ) if @tables_created.include?(nr.name) @cache[nr.name] << filtered_row.unshift(row_idx) if @cache[nr.name].size >= 10 insert_cache(nr.name) next end else # If the table doesnt exist, cache it for now. @cache[nr.name] << filtered_row.unshift(row_idx) end end end |
#row_bounds ⇒ Range
This covers the max size of all named ranges
101 102 103 |
# File 'lib/cure/extract/named_range_processor.rb', line 101 def row_bounds @row_bounds ||= calculate_row_bounds end |