Class: DistributedTrie::Trie
- Inherits:
-
Object
- Object
- DistributedTrie::Trie
- Defined in:
- lib/distributedtrie/trie.rb
Instance Method Summary collapse
- #_createTree(key) ⇒ Object
- #_getInternal(type = :work) ⇒ Object
- #_getNextLetters(node) ⇒ Object
- #_mergeIndex(indexStr) ⇒ Object
- #_searchWith(key, &block) ⇒ Object
- #addKey!(key) ⇒ Object
- #cancel ⇒ Object
- #commit! ⇒ Object
- #commonPrefixSearch(key) ⇒ Object
- #deleteKey!(key) ⇒ Object
- #exactMatchSearch(key) ⇒ Object
-
#fuzzySearch(searchWord, threshold = 0.90) ⇒ Object
return: [ [distance, keyword], [distance, keyword], … ].
-
#initialize(kvsif, prefixString) ⇒ Trie
constructor
kvsif …
- #listChilds(key) ⇒ Object
- #rangeSearch(from, to) ⇒ Object
- #search(entryNode, &block) ⇒ Object
Constructor Details
#initialize(kvsif, prefixString) ⇒ Trie
kvsif … Please implement like DistributedTrie::KvsIF class and specify instance of it.
41 42 43 44 45 46 |
# File 'lib/distributedtrie/trie.rb', line 41 def initialize( kvsif, prefixString ) @kvsif = kvsif @req = Hash.new @prefixString = prefixString @key_hash = Hash.new end |
Instance Method Details
#_createTree(key) ⇒ Object
193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 |
# File 'lib/distributedtrie/trie.rb', line 193 def _createTree( key ) h = Hash.new str = '' key.split( // ).each { |c| val = if str.size == (key.size-1) c + '$' else c end h [ str ] = val str += c } h.keys.each{ |key| if not @key_hash.has_key?( key ) @key_hash[ key ] = '' end @key_hash[ key ] += ' ' + h[ key ] @key_hash[key] = _mergeIndex( @key_hash[key] ) } @key_hash end |
#_getInternal(type = :work) ⇒ Object
216 217 218 |
# File 'lib/distributedtrie/trie.rb', line 216 def _getInternal( type = :work ) @key_hash end |
#_getNextLetters(node) ⇒ Object
152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 |
# File 'lib/distributedtrie/trie.rb', line 152 def _getNextLetters( node ) str = @kvsif.get( @prefixString + node ) if str term = [] nonTerm = [] str.split( /[ ]+/ ).each { |x| case x.size when 1 nonTerm << x when 2 term << x[0...1] end } [ term, nonTerm ] else [ [], [] ] end end |
#_mergeIndex(indexStr) ⇒ Object
171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 |
# File 'lib/distributedtrie/trie.rb', line 171 def _mergeIndex( indexStr ) # "a$ a" => "a$" # merge into terminal # " a$" => "a$" # strip spaces # "a$ b" => "a$ b" # alredy merged h = Hash.new term = Array.new nonTerm = Array.new indexStr.split( /[ ]+/ ).each {|entry| case entry.size when 1 nonTerm << entry when 2 term << entry[0...1] else end } arr = term.uniq.map{ |x| x + '$' } arr += nonTerm.uniq.reject { |x| term.include?( x ) } arr.join( ' ' ) end |
#_searchWith(key, &block) ⇒ Object
95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
# File 'lib/distributedtrie/trie.rb', line 95 def _searchWith( key, &block ) result = [] (term, nonTerm) = _getNextLetters( key ) term.each { |x| arg = key + x #pp [ "_check(1)", arg ] if block.call( arg, true ) #pp [ '_match(1)', key, x ] result += _searchWith( key + x, &block ) result << arg elsif block.call( arg, false ) #pp [ '_match(2)', key, x ] result += _searchWith( key + x, &block ) end } nonTerm.each { |x| arg = key + x #pp [ "_check(3)", arg ] if block.call( arg, false ) #pp [ '_match(3)', key, x ] result += _searchWith( key + x, &block ) end } result end |
#addKey!(key) ⇒ Object
48 49 50 |
# File 'lib/distributedtrie/trie.rb', line 48 def addKey!( key ) _createTree( key ) end |
#cancel ⇒ Object
63 64 65 |
# File 'lib/distributedtrie/trie.rb', line 63 def cancel() @key_hash = Hash.new end |
#commit! ⇒ Object
55 56 57 58 59 60 61 |
# File 'lib/distributedtrie/trie.rb', line 55 def commit!() @key_hash.each_key { |key| cur = @kvsif.get( @prefixString + key, "" ) @kvsif.put!( @prefixString + key, _mergeIndex( cur + " " + @key_hash[ key ] )) } @key_hash = Hash.new end |
#commonPrefixSearch(key) ⇒ Object
80 81 82 83 |
# File 'lib/distributedtrie/trie.rb', line 80 def commonPrefixSearch( key ) result = exactMatchSearch( key ) result += listChilds( key ) end |
#deleteKey!(key) ⇒ Object
52 53 |
# File 'lib/distributedtrie/trie.rb', line 52 def deleteKey!( key ) end |
#exactMatchSearch(key) ⇒ Object
85 86 87 88 89 90 91 92 93 |
# File 'lib/distributedtrie/trie.rb', line 85 def exactMatchSearch( key ) (term, nonTerm) = _getNextLetters( key[0...(key.size-1)] ) #pp [ "exactMatchSearch", key, key[0...(key.size-1)], term, nonTerm ] if term.include?( key[-1] ) [key] else [] end end |
#fuzzySearch(searchWord, threshold = 0.90) ⇒ Object
return: [ [distance, keyword], [distance, keyword], … ]
134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 |
# File 'lib/distributedtrie/trie.rb', line 134 def fuzzySearch( searchWord, threshold = 0.90 ) jarow = FuzzyStringMatch::JaroWinkler.create( ) result = search( '' ) { |x,termFlag| _word = searchWord if not termFlag and (x.size < searchWord.size) _word = searchWord[0...x.size] (searchWord.size-x.size).times { |i| _word += ' ' x += ' ' # pp [ "non terminal: ", i, x, _word ] } end result = jarow.getDistance( x, _word ) threshold <= result } result.map { |k| [ jarow.getDistance( searchWord, k ), k ] }.sort_by {|item| 1.0 - item[0]} end |
#listChilds(key) ⇒ Object
67 68 69 70 71 72 73 74 75 76 77 78 |
# File 'lib/distributedtrie/trie.rb', line 67 def listChilds( key ) result = [] (term, nonTerm) = _getNextLetters( key ) #pp [ "searchChilds", key, term, nonTerm ] term.each { |x| result << key + x } (term + nonTerm).each { |x| result += listChilds( key + x ) } result end |
#rangeSearch(from, to) ⇒ Object
125 126 127 128 129 130 131 |
# File 'lib/distributedtrie/trie.rb', line 125 def rangeSearch( from, to ) search( '' ) { |x,termFlag| _from = from[0...x.size] _to = to [0...x.size] ( _from <= x ) && ( x <= _to ) } end |
#search(entryNode, &block) ⇒ Object
121 122 123 |
# File 'lib/distributedtrie/trie.rb', line 121 def search( entryNode, &block ) _searchWith( entryNode, &block ) end |