Class: ClWiki::Indexer

Inherits:
Object
  • Object
show all
Defined in:
lib/cl_wiki/index.rb

Constant Summary collapse

WAIT =
true

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(wiki_conf = $wiki_conf, fn = nil) ⇒ Indexer

Returns a new instance of Indexer.



23
24
25
26
27
28
29
30
31
32
33
34
# File 'lib/cl_wiki/index.rb', line 23

def initialize(wiki_conf=$wiki_conf, fn=nil)
  @wiki_conf = wiki_conf

  @index = ClIndex.new
  @recent = ClIndex.new
  @pages = ClIndex.new
  @hits = ClIndex.new if @wiki_conf.access_log_index
  @rootDir = @wiki_conf.wiki_path
  @fn = fn
  @record_hits = true
  load
end

Instance Attribute Details

#indexObject (readonly)

Returns the value of attribute index.



15
16
17
# File 'lib/cl_wiki/index.rb', line 15

def index
  @index
end

Class Method Details

.defaultPortObject



19
20
21
# File 'lib/cl_wiki/index.rb', line 19

def self.defaultPort
  '9111'
end

Instance Method Details

#add_hit(fullPageName) ⇒ Object



281
282
283
284
285
286
287
288
289
290
# File 'lib/cl_wiki/index.rb', line 281

def add_hit(fullPageName)
  if @record_hits && @wiki_conf.access_log_index
    put_status('Hit on ' + fullPageName)
    @hits.add(fullPageName, Time.now, WAIT)
    thread = Thread.new do
      @hits.save(hits_filename, WAIT)
    end
    @wiki_conf.wait_on_thread(thread)
  end
end

#add_to_index(term, fullPageName) ⇒ Object



98
99
100
# File 'lib/cl_wiki/index.rb', line 98

def add_to_index(term, fullPageName)
  @index.add(term, fullPageName, WAIT)
end

#add_to_pages(fullPageName) ⇒ Object



108
109
110
# File 'lib/cl_wiki/index.rb', line 108

def add_to_pages(fullPageName)
  @pages.add(fullPageName, nil, WAIT)
end

#add_to_recent(modTime, fullPageName) ⇒ Object



102
103
104
105
106
# File 'lib/cl_wiki/index.rb', line 102

def add_to_recent(modTime, fullPageName)
  # remove all other instances of this page, we only need the current modTime
  @recent.remove(fullPageName, WAIT)
  @recent.add(modTime, fullPageName, WAIT)
end

#build(limit = -1,, purge = false) ⇒ Object



46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# File 'lib/cl_wiki/index.rb', line 46

def build(limit=-1, purge=false)
  @record_hits = false
  begin
    fileCount = 0
    raise '$wikiPageExt not set' if $wikiPageExt.empty? # for debugging only, really
    files = Dir[::File.join(@rootDir, '**/*' + $wikiPageExt)]
    if limit == -1
      p = Progress.new(files.length)
    else
      p = Progress.new(limit)
    end
    p.start
    files.each do |fn|
      next if !::File.file?(fn)
      break if (limit > -1) && (fileCount >= limit)
      fileCount += 1
      fullName = fn.sub(@rootDir, '')
      fullName = fullName.sub(/#{$wikiPageExt}/, '')
      index_page(fullName, purge)
      do_puts p.progress(true)
    end
  ensure
    @record_hits = true
  end
end

#do_puts(text) ⇒ Object



36
37
38
39
40
41
42
43
44
# File 'lib/cl_wiki/index.rb', line 36

def do_puts(text)
  if @fn
    File.open(@fn, 'a+') do |f|
      f.puts text
    end
  else
    puts text
  end
end

#dumpObject



213
214
215
216
217
218
# File 'lib/cl_wiki/index.rb', line 213

def dump
  dump_clindex(@index, 'index')
  dump_clindex(@recent, 'recent')
  dump_clindex(@pages, 'pages')
  dump_clindex(@hits, 'hits') if @wiki_conf.access_log_index
end

#dump_clindex(aindex, fn_prefix) ⇒ Object



194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
# File 'lib/cl_wiki/index.rb', line 194

def dump_clindex(aindex, fn_prefix)
  put_status "Dumping #{fn_prefix}..." do
    hash = aindex.index
    ::File.open(fn_prefix + '.keys.dump.txt', 'w+') do |f|
      keys = hash.keys
      keys.sort.each do |key|
        f.puts key
      end
    end
    File.open(fn_prefix + '.full.dump.txt', 'w+') do |f|
      fullary = hash.to_a
      fullary.sort!
      fullary.each do |keyValueAry|
        f.puts keyValueAry[0].inspect + " => " + keyValueAry[1].inspect
      end
    end
  end
end

#hit_summary(start_index = 0, end_index = -1)) ⇒ Object



292
293
294
295
296
297
298
299
300
# File 'lib/cl_wiki/index.rb', line 292

def hit_summary(start_index=0, end_index=-1)
  if @wiki_conf.access_log_index
    hit_index = nil
    @hits.do_read(WAIT) do
      hit_index = @hits.index.dup
    end
    hit_index.sort { |a, b| b[1].length <=> a[1].length }[start_index..end_index]
  end
end

#hits_filenameObject



112
113
114
# File 'lib/cl_wiki/index.rb', line 112

def hits_filename
  ::File.join(::File.expand_path('.'), 'hits.dat')
end

#index_filenameObject



116
117
118
# File 'lib/cl_wiki/index.rb', line 116

def index_filename
  ::File.join(::File.expand_path('.'), 'index.dat')
end

#index_page(fullName, purge = false) ⇒ Object



72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# File 'lib/cl_wiki/index.rb', line 72

def index_page(fullName, purge=false)
  put_status "indexing #{fullName}" do
    pg = ClWiki::Page.new(fullName, @rootDir)
    pg.read_raw_content
    if purge && pg.content_never_edited?
      put_status("purging #{fullName}")
      pg.delete
      remove_page_from_index(fullName)
    else
      formatter = ClWiki::PageFormatter.new(pg.raw_content, fullName)
      formatter.formatLinks do |word|
        if formatter.is_wiki_name?(word)
          word = formatter.expand_path(word, fullName)
        else
          word.downcase!
        end
        add_to_index(word, fullName)
      end
      add_to_index(fullName, fullName)
      add_to_pages(fullName)

      add_to_recent(pg.mtime, fullName)
    end
  end
end

#loadObject



175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
# File 'lib/cl_wiki/index.rb', line 175

def load
  put_status 'Loading' do
    put_status 'Loading Main' do
      @index.load(index_filename, WAIT) if ::File.exists?(index_filename)
    end
    put_status 'Loading Recent' do
      @recent.load(recent_filename, WAIT) if ::File.exists?(recent_filename)
    end
    put_status 'Loading Pages' do
      @pages.load(pages_filename, WAIT) if ::File.exists?(pages_filename)
    end
    if @wiki_conf.access_log_index
      put_status 'Loading Hits' do
        @hits.load(hits_filename, WAIT) if ::File.exists?(hits_filename)
      end
    end
  end
end

#page_exists?(fullPageName) ⇒ Boolean

Returns:

  • (Boolean)


275
276
277
278
279
# File 'lib/cl_wiki/index.rb', line 275

def page_exists?(fullPageName)
  exists = false
  exists = @pages.term_exists?(fullPageName, WAIT)
  exists
end

#pages_filenameObject



124
125
126
# File 'lib/cl_wiki/index.rb', line 124

def pages_filename
  ::File.join(::File.expand_path('.'), 'pages.dat')
end

#pages_out(rootPage) ⇒ Object



261
262
263
264
265
266
267
268
269
270
271
272
273
# File 'lib/cl_wiki/index.rb', line 261

def pages_out(rootPage)
  all = @index.all_terms(rootPage, WAIT)
  #all.delete_if do |term|
  #  term[0..0] != '/' || !ClWikiPage.page_exists?(term.dup)
  #end
  all.delete_if do |term|
    (term[0..0] != '/') || (term == '/') || (term == '//')
  end
  all.delete_if do |term|
    !ClWikiPage.page_exists?(term.dup)
  end
  all
end

#put_status(status) ⇒ Object



133
134
135
136
137
138
139
140
141
# File 'lib/cl_wiki/index.rb', line 133

def put_status(status)
  if block_given?
    do_puts Time.now.strftime("%I:%M:%S") + ' ' + status + '... '
    yield
    do_puts Time.now.strftime("%I:%M:%S") + ' ' + status + ' done '
  else
    do_puts Time.now.strftime("%I:%M:%S") + ' ' + status
  end
end

#recent(top = -1)) ⇒ Object



254
255
256
257
258
259
# File 'lib/cl_wiki/index.rb', line 254

def recent(top=-1)
  @recent.do_read(WAIT) do
    hash = @recent.index
    hash.sort { |a, b| b[0] <=> a[0] }[0..top]
  end
end

#recent_filenameObject



120
121
122
# File 'lib/cl_wiki/index.rb', line 120

def recent_filename
  ::File.join(::File.expand_path('.'), 'recent.dat')
end

#reindex_and_save_async(fullPageName) ⇒ Object



143
144
145
146
147
148
149
# File 'lib/cl_wiki/index.rb', line 143

def reindex_and_save_async(fullPageName)
  thread = Thread.new do
    reindex_page(fullPageName)
    save
  end
  @wiki_conf.wait_on_thread(thread)
end

#reindex_page(fullPageName) ⇒ Object



151
152
153
154
155
156
# File 'lib/cl_wiki/index.rb', line 151

def reindex_page(fullPageName)
  put_status 'Reindexing ' + fullPageName do
    remove_page_from_index(fullPageName)
    index_page(fullPageName)
  end
end

#remove_page_from_index(fullPageName) ⇒ Object



128
129
130
131
# File 'lib/cl_wiki/index.rb', line 128

def remove_page_from_index(fullPageName)
  @index.remove(fullPageName, WAIT)
  @recent.remove(fullPageName, WAIT)
end

#saveObject



158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
# File 'lib/cl_wiki/index.rb', line 158

def save
  put_status 'Saving' do
    put_status 'Saving Main' do
      @index.save(index_filename, WAIT)
    end
    put_status 'Saving Recent' do
      @recent.save(recent_filename, WAIT)
    end
    put_status 'Saving Pages' do
      @pages.save(pages_filename, WAIT)
    end
    put_status 'Saving Hits' do
      @hits.save(hits_filename, WAIT) if @wiki_conf.access_log_index
    end
  end
end

#search(text) ⇒ Object



220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
# File 'lib/cl_wiki/index.rb', line 220

def search(text)
  terms = text.split(' ')
  allhits = nil
  terms.each do |term|
    termhits = []
    @index.search(term, termhits, WAIT)
    termhits.flatten!
    if !allhits
      allhits = termhits
    else
      allhits = allhits & termhits
    end
  end
  allhits = [] if !allhits # shouldn't ever happen I'd wager
  p allhits if $debug
  allhits.flatten!
  allhits.uniq!
  allhits.sort!
  p allhits if $debug
  allhits
end

#sort_hits_by_recent(hits, top = -1)) ⇒ Object



242
243
244
245
246
247
248
249
250
251
252
# File 'lib/cl_wiki/index.rb', line 242

def sort_hits_by_recent(hits, top=-1)
  hits_by_date = {}
  # don't send top into this call to recent, we need all recent, then
  # we filter that down to all matches, /then/ we take the topmost
  # of that matching list
  recent.each do |date, page_name_array|
    hits_at_this_time = page_name_array & hits
    hits_by_date[date] = hits_at_this_time if !hits_at_this_time.empty?
  end
  hits_by_date.sort { |a, b| b[0] <=> a[0] }[0..top]
end