Class: Cure::Extract::NamedRangeProcessor

Inherits:
BaseProcessor show all
Defined in:
lib/cure/extract/named_range_processor.rb

Instance Attribute Summary collapse

Attributes inherited from BaseProcessor

#database_service

Instance Method Summary collapse

Constructor Details

#initialize(database_service, candidate_nrs) ⇒ NamedRangeProcessor

Returns a new instance of NamedRangeProcessor.



17
18
19
20
21
22
23
# File 'lib/cure/extract/named_range_processor.rb', line 17

def initialize(database_service, candidate_nrs)
  @candidate_nrs = candidate_nrs
  @cache = init_cache

  @tables_created = []
  super database_service
end

Instance Attribute Details

#candidate_nrsArray<Extraction::NamedRange> (readonly)

Returns named_ranges.

Returns:

  • (Array<Extraction::NamedRange>)

    named_ranges



15
16
17
# File 'lib/cure/extract/named_range_processor.rb', line 15

def candidate_nrs
  @candidate_nrs
end

Instance Method Details

#after_processObject



93
94
95
96
97
# File 'lib/cure/extract/named_range_processor.rb', line 93

def after_process
  @cache.each do |named_range, cache|
    insert_cache(named_range) if cache.size.positive?
  end
end

#calculate_row_boundsRange

Returns:

  • (Range)


106
107
108
109
# File 'lib/cure/extract/named_range_processor.rb', line 106

def calculate_row_bounds
  positions = @candidate_nrs.map(&:row_bounds).flatten.sort
  (positions.first..positions.last)
end

#process_row(row_idx, csv_row) ⇒ Object

Parameters:

  • row_idx (Integer)
  • csv_row (Array)


27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# File 'lib/cure/extract/named_range_processor.rb', line 27

def process_row(row_idx, csv_row) # rubocop:disable all
  # Return if row is not in any named range
  return unless row_bounds.cover?(row_idx)

  # Iterate over the NR's, if its inside those bounds, add it
  @candidate_nrs.each do |nr|
    next unless nr.row_in_bounds?(row_idx)

    if nr.filter.row_handler.has_content?
      unless nr.filter.row_handler.including_proc[:where].call(csv_row)
        nr.row_count += 1
        next
      end
    end

    # Row is inbounds - we need to do two things, filter the content, create the table, insert the row
    if nr.header_in_bounds?(nr.active_row_count(row_idx))
      column_headers = csv_row[nr.section[0]..nr.section[1]]

      if nr.filter.col_handler.has_content?
        nr.filter.col_handler.set_col_positions(column_headers)
        column_headers = nr.filter.col_handler.translate_headers(column_headers)
      end

      # Create table, flush cache
      create_table(nr.name, column_headers)
      @tables_created << nr.name

      @cache[nr.name].each do |row|
        insert_record(
          nr.name,
          nr.filter.col_handler.filter_row(row)
        )
      end

      @cache[nr.name] = [] # Evict cache

      next
    end

    next unless nr.content_in_bounds?(row_idx)

    # 0. Remove unnecessary columns


    # 2. If cache is over n records and if the table exists,
    # add it to the database.

    filtered_row = nr.filter.col_handler.filter_row(
      csv_row[nr.section[0]..nr.section[1]]
    )

    if @tables_created.include?(nr.name)
      @cache[nr.name] << filtered_row.unshift(row_idx)

      if @cache[nr.name].size >= 10
        insert_cache(nr.name)
        next
      end
    else
      # If the table doesnt exist, cache it for now.
      @cache[nr.name] << filtered_row.unshift(row_idx)
    end
  end
end

#row_boundsRange

This covers the max size of all named ranges

Returns:

  • (Range)


101
102
103
# File 'lib/cure/extract/named_range_processor.rb', line 101

def row_bounds
  @row_bounds ||= calculate_row_bounds
end