Class: Elasticsearch::Rails::HA::ParallelIndexer
- Inherits:
-
Object
- Object
- Elasticsearch::Rails::HA::ParallelIndexer
- Includes:
- Term::ANSIColor
- Defined in:
- lib/elasticsearch/rails/ha/parallel_indexer.rb
Instance Attribute Summary collapse
-
#batch_size ⇒ Object
readonly
Returns the value of attribute batch_size.
-
#force ⇒ Object
readonly
Returns the value of attribute force.
-
#idx_name ⇒ Object
readonly
Returns the value of attribute idx_name.
-
#klass ⇒ Object
readonly
Returns the value of attribute klass.
-
#max ⇒ Object
readonly
Returns the value of attribute max.
-
#nprocs ⇒ Object
readonly
Returns the value of attribute nprocs.
-
#scope ⇒ Object
readonly
Returns the value of attribute scope.
-
#verbose ⇒ Object
readonly
Returns the value of attribute verbose.
Instance Method Summary collapse
- #blue_log(msg) ⇒ Object
-
#initialize(opts) ⇒ ParallelIndexer
constructor
leverage multiple cores to run indexing in parallel.
- #process_child_results(results) ⇒ Object
- #red_log(msg) ⇒ Object
- #run ⇒ Object
- #run_child(start_at) ⇒ Object
Constructor Details
#initialize(opts) ⇒ ParallelIndexer
leverage multiple cores to run indexing in parallel
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
# File 'lib/elasticsearch/rails/ha/parallel_indexer.rb', line 23 def initialize(opts) @klass = opts[:klass] or fail "klass required" @idx_name = opts[:idx_name] or fail "idx_name required" @nprocs = opts[:nprocs] or fail "nprocs required" @batch_size = opts[:batch_size] or fail "batch_size required" @max = opts[:max] @force = opts[:force] @verbose = opts[:verbose] @scope = opts[:scope] # make sure klass is not a simple string if @klass.is_a?(String) @klass = @klass.constantize end # calculate array of offsets based on nprocs @total_expected = klass.count @pool_size = (@total_expected / @nprocs.to_f).ceil end |
Instance Attribute Details
#batch_size ⇒ Object (readonly)
Returns the value of attribute batch_size.
12 13 14 |
# File 'lib/elasticsearch/rails/ha/parallel_indexer.rb', line 12 def batch_size @batch_size end |
#force ⇒ Object (readonly)
Returns the value of attribute force.
12 13 14 |
# File 'lib/elasticsearch/rails/ha/parallel_indexer.rb', line 12 def force @force end |
#idx_name ⇒ Object (readonly)
Returns the value of attribute idx_name.
12 13 14 |
# File 'lib/elasticsearch/rails/ha/parallel_indexer.rb', line 12 def idx_name @idx_name end |
#klass ⇒ Object (readonly)
Returns the value of attribute klass.
12 13 14 |
# File 'lib/elasticsearch/rails/ha/parallel_indexer.rb', line 12 def klass @klass end |
#max ⇒ Object (readonly)
Returns the value of attribute max.
12 13 14 |
# File 'lib/elasticsearch/rails/ha/parallel_indexer.rb', line 12 def max @max end |
#nprocs ⇒ Object (readonly)
Returns the value of attribute nprocs.
12 13 14 |
# File 'lib/elasticsearch/rails/ha/parallel_indexer.rb', line 12 def nprocs @nprocs end |
#scope ⇒ Object (readonly)
Returns the value of attribute scope.
12 13 14 |
# File 'lib/elasticsearch/rails/ha/parallel_indexer.rb', line 12 def scope @scope end |
#verbose ⇒ Object (readonly)
Returns the value of attribute verbose.
12 13 14 |
# File 'lib/elasticsearch/rails/ha/parallel_indexer.rb', line 12 def verbose @verbose end |
Instance Method Details
#blue_log(msg) ⇒ Object
14 15 16 |
# File 'lib/elasticsearch/rails/ha/parallel_indexer.rb', line 14 def blue_log(msg) blue{ msg } end |
#process_child_results(results) ⇒ Object
91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
# File 'lib/elasticsearch/rails/ha/parallel_indexer.rb', line 91 def process_child_results(results) # check exit status of each child so we know if we should throw exception results.each do |pair| pid = pair[0] pstat = pair[1] exit_ok = true if pstat.exited? @verbose and puts blue_log("PID #{pid} exited with #{pstat.exitstatus}") end if pstat.signaled? puts red_log(" >> #{pid} exited with uncaught signal #{pstat.termsig}") exit_ok = false end if !pstat.success? puts red_log(" >> #{pid} was not successful") exit_ok = false end if pair[1].exitstatus != 0 puts red_log(" >> #{pid} exited with non-zero status") exit_ok = false end if !exit_ok raise red_log("PID #{pair[0]} exited abnormally, so the whole reindex fails") end end end |
#red_log(msg) ⇒ Object
18 19 20 |
# File 'lib/elasticsearch/rails/ha/parallel_indexer.rb', line 18 def red_log(msg) red{ msg } end |
#run ⇒ Object
43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
# File 'lib/elasticsearch/rails/ha/parallel_indexer.rb', line 43 def run return if @pool_size < 1 # get all ids since we can't assume there are no holes in the PK sequencing ids = klass.order('id ASC').pluck(:id) offsets = [] ids.each_slice(@pool_size) do |chunk| #puts "chunk: size=#{chunk.size} #{chunk.first}..#{chunk.last}" offsets.push( chunk.first ) end if @verbose puts blue_log("Parallel Indexer: index=#{@idx_name} total=#{@total_expected} nprocs=#{@nprocs} pool_size=#{@pool_size} offsets=#{offsets} ") end if @force @verbose and puts blue_log("Force creating new index") klass.__elasticsearch__.create_index! force: true, index: idx_name klass.__elasticsearch__.refresh_index! index: idx_name end @current_db_config = ::ActiveRecord::Base.connection_config # IMPORTANT before forks in offsets loop ::ActiveRecord::Base.connection.disconnect! child_pids = [] offsets.each do |start_at| child_pid = fork do run_child(start_at) end if child_pid child_pids << child_pid end end # reconnect in parent ::ActiveRecord::Base.establish_connection(@current_db_config) # Process.waitall seems to hang during tests. Do it manually. child_results = [] child_pids.each do |pid| Process.wait(pid) child_results.push [pid, $?] end process_child_results(child_results) end |
#run_child(start_at) ⇒ Object
121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 |
# File 'lib/elasticsearch/rails/ha/parallel_indexer.rb', line 121 def run_child(start_at) # IMPORTANT after fork ::ActiveRecord::Base.establish_connection(@current_db_config) # IMPORTANT for tests to determine whether at_end should run ENV["I_AM_HA_CHILD"] = "true" completed = 0 errors = [] @verbose and puts blue_log("Start worker #{$$} at offset #{start_at}") = ::ANSI::Progressbar.new("#{klass} [#{$$}]", @pool_size, STDOUT) rescue nil checkpoint = false if win_width = .__send__ :get_width title_width = (win_width / 4).to_i .format("#{klass} [#{$$}]: %3d%% %s %s", :percentage, :bar, :stat) .__send__ :show . = '=' else checkpoint = true end @klass.__elasticsearch__.import return: 'errors', index: @idx_name, start: start_at, scope: @scope, batch_size: @batch_size do |resp| # show errors immediately (rather than buffering them) errors += resp['items'].select { |k, v| k.values.first['error'] } completed += resp['items'].size if && @verbose .inc resp['items'].size end if checkpoint && @verbose puts blue_log("[#{$$}] #{Time.now.utc.iso8601} : #{completed} records completed") end STDERR.flush STDOUT.flush if errors.size > 0 STDOUT.puts "ERRORS in #{$$}:" STDOUT.puts errors.pretty_inspect end if completed >= @pool_size || (@max && @max.to_i == completed) .finish if @verbose and puts blue_log("Worker #{$$} finished #{completed} records") exit!(true) # exit child worker end end # end do |resp| block end |