Class: Librr::Indexer

Inherits:
Object
  • Object
show all
Includes:
Logger::ClassLogger
Defined in:
lib/librr/indexer.rb

Defined Under Namespace

Modules: SolrManager Classes: SolrOutHandler

Constant Summary collapse

SLICE_NUM =
300

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods included from Logger::ClassLogger

#debug, #info, #logger

Instance Attribute Details

#solr_startedObject

Returns the value of attribute solr_started.



14
15
16
# File 'lib/librr/indexer.rb', line 14

def solr_started
  @solr_started
end

Class Method Details

.pid_fileObject



18
19
20
# File 'lib/librr/indexer.rb', line 18

def self.pid_file
  Settings.in_dir('solr.pid')
end

Instance Method Details

#after_startObject



72
73
74
75
76
77
78
79
80
# File 'lib/librr/indexer.rb', line 72

def after_start
  @solr_started = true
  self.debug 'after solr start'

  @solr = RSolr.connect(
                url: "http://localhost:#{Settings.solr_port}/solr",
                read_timeout: 10, open_timeout: 10)
  @after_block.call if @after_block
end

#cleanupObject



92
93
94
95
96
97
98
# File 'lib/librr/indexer.rb', line 92

def cleanup
  self.debug 'cleanup'
  self.run_solr {
    @solr.delete_by_query '*:*'
    @solr.commit
  }
end

#index_directory(dir) ⇒ Object



100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# File 'lib/librr/indexer.rb', line 100

def index_directory(dir)
  self.debug "index dir: #{dir}"
  files = Dir.glob(File.join(dir, "**/*"))
  EM::Iterator.new(files)
    .each(
     proc { |file, iter|
            if File.file?(file)
              self.index_file(file){ iter.next }
            else
              iter.next
            end
          },
     proc { self.debug "index dir finished: #{dir}" }
     )
end

#index_file(file, &block) ⇒ Object



124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# File 'lib/librr/indexer.rb', line 124

def index_file(file, &block)
  return if File.basename(file) =~ Settings.escape_files

  self.run_solr {
    @solr.delete_by_query "filename:#{file}"
    @solr.commit
  }

  unless File.exists?(file)
    self.debug "remove index file: #{file}"
    block.call if block
    return
  end

  self.debug "index file: #{file}"
  f = File.open(file)
  enum = f.each.each_slice(SLICE_NUM).each_with_index
  DelayIterator.new(enum)
    .each(
     proc { |lines, i|
            data = lines.each_with_index.map do |line, j|
              num = SLICE_NUM * i + j + 1
              line = fix_encoding(line).rstrip
              {id: SecureRandom.uuid, filename: file, linenum: num, line: line}
            end

            self.run_solr {
              @solr.add data
              @solr.commit
            }

            self.debug "working on lines: #{i*SLICE_NUM}"
          },
     proc {
            f.close
            block.call if block
          }
     )
end

#remove_index_directory(dir) ⇒ Object



116
117
118
119
120
121
122
# File 'lib/librr/indexer.rb', line 116

def remove_index_directory(dir)
  self.debug "remove dir: #{dir}"
  self.run_solr {
    @solr.delete_by_query "filename:#{dir}*"
    @solr.commit
  }
end

#run_solr(&block) ⇒ Object



82
83
84
85
86
87
88
89
90
# File 'lib/librr/indexer.rb', line 82

def run_solr &block
  retry_times = 2
  begin
    block.call
  rescue Net::ReadTimeout
    retry_times -= 1
    retry if retry_times >= 0
  end
end

#search(str, opts = {}) ⇒ Object



164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
# File 'lib/librr/indexer.rb', line 164

def search(str, opts={})
  self.debug "search: #{str}"

  rows = opts[:rows] || 30
  rows = (2 ** 31 - 1) if opts[:all]
  query = "line:#{str}"
  query += " filename:#{opts[:location]}*" if opts[:location]

  result = self.run_solr {
    @solr.get 'select', params: {q: query, rows: rows}
  }

  result['response']['docs'].map do |row|
    [row['filename'], row['linenum'], row['line']].flatten
  end
end

#start(&after_block) ⇒ Object



22
23
24
25
26
27
28
29
30
31
32
33
34
35
# File 'lib/librr/indexer.rb', line 22

def start &after_block
  @after_block = after_block

  kill_process_by_file(self.class.pid_file)

  Dir.chdir File.join(Dir.pwd, 'solr') do
    solr = 'java -jar start.jar'
    solr_in, solr_out, solr_err = redirect_std do
      EM.popen(solr, SolrManager)
      # TODO: write pid file
    end
    EM.attach(solr_err, SolrOutHandler, self)
  end
end