Class: ClWiki::Indexer
- Inherits:
-
Object
- Object
- ClWiki::Indexer
- Defined in:
- lib/cl_wiki/index.rb
Constant Summary collapse
- WAIT =
true
Instance Attribute Summary collapse
-
#index ⇒ Object
readonly
Returns the value of attribute index.
Class Method Summary collapse
Instance Method Summary collapse
- #add_hit(fullPageName) ⇒ Object
- #add_to_index(term, fullPageName) ⇒ Object
- #add_to_pages(fullPageName) ⇒ Object
- #add_to_recent(modTime, fullPageName) ⇒ Object
- #build(limit = -1,, purge = false) ⇒ Object
- #do_puts(text) ⇒ Object
- #dump ⇒ Object
- #dump_clindex(aindex, fn_prefix) ⇒ Object
- #hit_summary(start_index = 0, end_index = -1)) ⇒ Object
- #hits_filename ⇒ Object
- #index_filename ⇒ Object
- #index_page(fullName, purge = false) ⇒ Object
-
#initialize(wiki_conf = $wiki_conf, fn = nil) ⇒ Indexer
constructor
A new instance of Indexer.
- #load ⇒ Object
- #page_exists?(fullPageName) ⇒ Boolean
- #pages_filename ⇒ Object
- #pages_out(rootPage) ⇒ Object
- #put_status(status) ⇒ Object
- #recent(top = -1)) ⇒ Object
- #recent_filename ⇒ Object
- #reindex_and_save_async(fullPageName) ⇒ Object
- #reindex_page(fullPageName) ⇒ Object
- #remove_page_from_index(fullPageName) ⇒ Object
- #save ⇒ Object
- #search(text) ⇒ Object
- #sort_hits_by_recent(hits, top = -1)) ⇒ Object
Constructor Details
#initialize(wiki_conf = $wiki_conf, fn = nil) ⇒ Indexer
Returns a new instance of Indexer.
23 24 25 26 27 28 29 30 31 32 33 34 |
# File 'lib/cl_wiki/index.rb', line 23 def initialize(wiki_conf=$wiki_conf, fn=nil) @wiki_conf = wiki_conf @index = ClIndex.new @recent = ClIndex.new @pages = ClIndex.new @hits = ClIndex.new if @wiki_conf.access_log_index @rootDir = @wiki_conf.wiki_path @fn = fn @record_hits = true load end |
Instance Attribute Details
#index ⇒ Object (readonly)
Returns the value of attribute index.
15 16 17 |
# File 'lib/cl_wiki/index.rb', line 15 def index @index end |
Class Method Details
.defaultPort ⇒ Object
19 20 21 |
# File 'lib/cl_wiki/index.rb', line 19 def self.defaultPort '9111' end |
Instance Method Details
#add_hit(fullPageName) ⇒ Object
281 282 283 284 285 286 287 288 289 290 |
# File 'lib/cl_wiki/index.rb', line 281 def add_hit(fullPageName) if @record_hits && @wiki_conf.access_log_index put_status('Hit on ' + fullPageName) @hits.add(fullPageName, Time.now, WAIT) thread = Thread.new do @hits.save(hits_filename, WAIT) end @wiki_conf.wait_on_thread(thread) end end |
#add_to_index(term, fullPageName) ⇒ Object
98 99 100 |
# File 'lib/cl_wiki/index.rb', line 98 def add_to_index(term, fullPageName) @index.add(term, fullPageName, WAIT) end |
#add_to_pages(fullPageName) ⇒ Object
108 109 110 |
# File 'lib/cl_wiki/index.rb', line 108 def add_to_pages(fullPageName) @pages.add(fullPageName, nil, WAIT) end |
#add_to_recent(modTime, fullPageName) ⇒ Object
102 103 104 105 106 |
# File 'lib/cl_wiki/index.rb', line 102 def add_to_recent(modTime, fullPageName) # remove all other instances of this page, we only need the current modTime @recent.remove(fullPageName, WAIT) @recent.add(modTime, fullPageName, WAIT) end |
#build(limit = -1,, purge = false) ⇒ Object
46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
# File 'lib/cl_wiki/index.rb', line 46 def build(limit=-1, purge=false) @record_hits = false begin fileCount = 0 raise '$wikiPageExt not set' if $wikiPageExt.empty? # for debugging only, really files = Dir[::File.join(@rootDir, '**/*' + $wikiPageExt)] if limit == -1 p = Progress.new(files.length) else p = Progress.new(limit) end p.start files.each do |fn| next if !::File.file?(fn) break if (limit > -1) && (fileCount >= limit) fileCount += 1 fullName = fn.sub(@rootDir, '') fullName = fullName.sub(/#{$wikiPageExt}/, '') index_page(fullName, purge) do_puts p.progress(true) end ensure @record_hits = true end end |
#do_puts(text) ⇒ Object
36 37 38 39 40 41 42 43 44 |
# File 'lib/cl_wiki/index.rb', line 36 def do_puts(text) if @fn File.open(@fn, 'a+') do |f| f.puts text end else puts text end end |
#dump ⇒ Object
213 214 215 216 217 218 |
# File 'lib/cl_wiki/index.rb', line 213 def dump dump_clindex(@index, 'index') dump_clindex(@recent, 'recent') dump_clindex(@pages, 'pages') dump_clindex(@hits, 'hits') if @wiki_conf.access_log_index end |
#dump_clindex(aindex, fn_prefix) ⇒ Object
194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 |
# File 'lib/cl_wiki/index.rb', line 194 def dump_clindex(aindex, fn_prefix) put_status "Dumping #{fn_prefix}..." do hash = aindex.index ::File.open(fn_prefix + '.keys.dump.txt', 'w+') do |f| keys = hash.keys keys.sort.each do |key| f.puts key end end File.open(fn_prefix + '.full.dump.txt', 'w+') do |f| fullary = hash.to_a fullary.sort! fullary.each do |keyValueAry| f.puts keyValueAry[0].inspect + " => " + keyValueAry[1].inspect end end end end |
#hit_summary(start_index = 0, end_index = -1)) ⇒ Object
292 293 294 295 296 297 298 299 300 |
# File 'lib/cl_wiki/index.rb', line 292 def hit_summary(start_index=0, end_index=-1) if @wiki_conf.access_log_index hit_index = nil @hits.do_read(WAIT) do hit_index = @hits.index.dup end hit_index.sort { |a, b| b[1].length <=> a[1].length }[start_index..end_index] end end |
#hits_filename ⇒ Object
112 113 114 |
# File 'lib/cl_wiki/index.rb', line 112 def hits_filename ::File.join(::File.('.'), 'hits.dat') end |
#index_filename ⇒ Object
116 117 118 |
# File 'lib/cl_wiki/index.rb', line 116 def index_filename ::File.join(::File.('.'), 'index.dat') end |
#index_page(fullName, purge = false) ⇒ Object
72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
# File 'lib/cl_wiki/index.rb', line 72 def index_page(fullName, purge=false) put_status "indexing #{fullName}" do pg = ClWiki::Page.new(fullName, @rootDir) pg.read_raw_content if purge && pg.content_never_edited? put_status("purging #{fullName}") pg.delete remove_page_from_index(fullName) else formatter = ClWiki::PageFormatter.new(pg.raw_content, fullName) formatter.formatLinks do |word| if formatter.is_wiki_name?(word) word = formatter.(word, fullName) else word.downcase! end add_to_index(word, fullName) end add_to_index(fullName, fullName) add_to_pages(fullName) add_to_recent(pg.mtime, fullName) end end end |
#load ⇒ Object
175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 |
# File 'lib/cl_wiki/index.rb', line 175 def load put_status 'Loading' do put_status 'Loading Main' do @index.load(index_filename, WAIT) if ::File.exists?(index_filename) end put_status 'Loading Recent' do @recent.load(recent_filename, WAIT) if ::File.exists?(recent_filename) end put_status 'Loading Pages' do @pages.load(pages_filename, WAIT) if ::File.exists?(pages_filename) end if @wiki_conf.access_log_index put_status 'Loading Hits' do @hits.load(hits_filename, WAIT) if ::File.exists?(hits_filename) end end end end |
#page_exists?(fullPageName) ⇒ Boolean
275 276 277 278 279 |
# File 'lib/cl_wiki/index.rb', line 275 def page_exists?(fullPageName) exists = false exists = @pages.term_exists?(fullPageName, WAIT) exists end |
#pages_filename ⇒ Object
124 125 126 |
# File 'lib/cl_wiki/index.rb', line 124 def pages_filename ::File.join(::File.('.'), 'pages.dat') end |
#pages_out(rootPage) ⇒ Object
261 262 263 264 265 266 267 268 269 270 271 272 273 |
# File 'lib/cl_wiki/index.rb', line 261 def pages_out(rootPage) all = @index.all_terms(rootPage, WAIT) #all.delete_if do |term| # term[0..0] != '/' || !ClWikiPage.page_exists?(term.dup) #end all.delete_if do |term| (term[0..0] != '/') || (term == '/') || (term == '//') end all.delete_if do |term| !ClWikiPage.page_exists?(term.dup) end all end |
#put_status(status) ⇒ Object
133 134 135 136 137 138 139 140 141 |
# File 'lib/cl_wiki/index.rb', line 133 def put_status(status) if block_given? do_puts Time.now.strftime("%I:%M:%S") + ' ' + status + '... ' yield do_puts Time.now.strftime("%I:%M:%S") + ' ' + status + ' done ' else do_puts Time.now.strftime("%I:%M:%S") + ' ' + status end end |
#recent(top = -1)) ⇒ Object
254 255 256 257 258 259 |
# File 'lib/cl_wiki/index.rb', line 254 def recent(top=-1) @recent.do_read(WAIT) do hash = @recent.index hash.sort { |a, b| b[0] <=> a[0] }[0..top] end end |
#recent_filename ⇒ Object
120 121 122 |
# File 'lib/cl_wiki/index.rb', line 120 def recent_filename ::File.join(::File.('.'), 'recent.dat') end |
#reindex_and_save_async(fullPageName) ⇒ Object
143 144 145 146 147 148 149 |
# File 'lib/cl_wiki/index.rb', line 143 def reindex_and_save_async(fullPageName) thread = Thread.new do reindex_page(fullPageName) save end @wiki_conf.wait_on_thread(thread) end |
#reindex_page(fullPageName) ⇒ Object
151 152 153 154 155 156 |
# File 'lib/cl_wiki/index.rb', line 151 def reindex_page(fullPageName) put_status 'Reindexing ' + fullPageName do remove_page_from_index(fullPageName) index_page(fullPageName) end end |
#remove_page_from_index(fullPageName) ⇒ Object
128 129 130 131 |
# File 'lib/cl_wiki/index.rb', line 128 def remove_page_from_index(fullPageName) @index.remove(fullPageName, WAIT) @recent.remove(fullPageName, WAIT) end |
#save ⇒ Object
158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 |
# File 'lib/cl_wiki/index.rb', line 158 def save put_status 'Saving' do put_status 'Saving Main' do @index.save(index_filename, WAIT) end put_status 'Saving Recent' do @recent.save(recent_filename, WAIT) end put_status 'Saving Pages' do @pages.save(pages_filename, WAIT) end put_status 'Saving Hits' do @hits.save(hits_filename, WAIT) if @wiki_conf.access_log_index end end end |
#search(text) ⇒ Object
220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 |
# File 'lib/cl_wiki/index.rb', line 220 def search(text) terms = text.split(' ') allhits = nil terms.each do |term| termhits = [] @index.search(term, termhits, WAIT) termhits.flatten! if !allhits allhits = termhits else allhits = allhits & termhits end end allhits = [] if !allhits # shouldn't ever happen I'd wager p allhits if $debug allhits.flatten! allhits.uniq! allhits.sort! p allhits if $debug allhits end |
#sort_hits_by_recent(hits, top = -1)) ⇒ Object
242 243 244 245 246 247 248 249 250 251 252 |
# File 'lib/cl_wiki/index.rb', line 242 def sort_hits_by_recent(hits, top=-1) hits_by_date = {} # don't send top into this call to recent, we need all recent, then # we filter that down to all matches, /then/ we take the topmost # of that matching list recent.each do |date, page_name_array| hits_at_this_time = page_name_array & hits hits_by_date[date] = hits_at_this_time if !hits_at_this_time.empty? end hits_by_date.sort { |a, b| b[0] <=> a[0] }[0..top] end |