Class: Cure::Extract::NamedRangeProcessor

Inherits:
BaseProcessor show all
Defined in:
lib/cure/extract/named_range_processor.rb

Instance Attribute Summary collapse

Attributes inherited from BaseProcessor

#database_service

Instance Method Summary collapse

Constructor Details

#initialize(database_service, candidate_nrs) ⇒ NamedRangeProcessor

Returns a new instance of NamedRangeProcessor.



17
18
19
20
21
22
23
# File 'lib/cure/extract/named_range_processor.rb', line 17

def initialize(database_service, candidate_nrs)
  @candidate_nrs = candidate_nrs
  @cache = init_cache

  @tables_created = []
  super database_service
end

Instance Attribute Details

#candidate_nrsArray<Extraction::NamedRange> (readonly)

Returns named_ranges.

Returns:

  • named_ranges



15
16
17
# File 'lib/cure/extract/named_range_processor.rb', line 15

def candidate_nrs
  @candidate_nrs
end

Instance Method Details

#after_processObject



93
94
95
96
97
# File 'lib/cure/extract/named_range_processor.rb', line 93

def after_process
  @cache.each do |named_range, cache|
    insert_cache(named_range) if cache.size.positive?
  end
end

#calculate_row_boundsRange

Returns:



106
107
108
109
# File 'lib/cure/extract/named_range_processor.rb', line 106

def calculate_row_bounds
  positions = @candidate_nrs.map(&:row_bounds).flatten.sort
  (positions.first..positions.last)
end

#process_row(row_idx, csv_row) ⇒ Object

Parameters:



27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# File 'lib/cure/extract/named_range_processor.rb', line 27

def process_row(row_idx, csv_row) # rubocop:disable all
  # Return if row is not in any named range
  return unless row_bounds.cover?(row_idx)

  # Iterate over the NR's, if its inside those bounds, add it
  @candidate_nrs.each do |nr|
    next unless nr.row_in_bounds?(row_idx)

    if nr.filter.row_handler.has_content?
      unless nr.filter.row_handler.including_proc[:where].call(csv_row)
        nr.row_count += 1
        next
      end
    end

    # Row is inbounds - we need to do two things, filter the content, create the table, insert the row
    if nr.header_in_bounds?(nr.active_row_count(row_idx))
      column_headers = csv_row[nr.section[0]..nr.section[1]]

      if nr.filter.col_handler.has_content?
        nr.filter.col_handler.set_col_positions(column_headers)
        column_headers = nr.filter.col_handler.translate_headers(column_headers)
      end

      # Create table, flush cache
      create_table(nr.name, column_headers)
      @tables_created << nr.name

      @cache[nr.name].each do |row|
        insert_record(
          nr.name,
          nr.filter.col_handler.filter_row(row)
        )
      end

      @cache[nr.name] = [] # Evict cache

      next
    end

    next unless nr.content_in_bounds?(row_idx)

    # 0. Remove unnecessary columns


    # 2. If cache is over n records and if the table exists,
    # add it to the database.

    filtered_row = nr.filter.col_handler.filter_row(
      csv_row[nr.section[0]..nr.section[1]]
    )

    if @tables_created.include?(nr.name)
      @cache[nr.name] << filtered_row.unshift(row_idx)

      if @cache[nr.name].size >= 10
        insert_cache(nr.name)
        next
      end
    else
      # If the table doesnt exist, cache it for now.
      @cache[nr.name] << filtered_row.unshift(row_idx)
    end
  end
end

#row_boundsRange

This covers the max size of all named ranges

Returns:



101
102
103
# File 'lib/cure/extract/named_range_processor.rb', line 101

def row_bounds
  @row_bounds ||= calculate_row_bounds
end