Class: RSI::Dictionary

Inherits:
Object
  • Object
show all
Includes:
Loggable
Defined in:
lib/rsi/dictionary.rb

Constant Summary collapse

META_FILE =
"meta.yaml"
TERMS_FILE =
"terms.list"
TERMGROUP_FILE =
"termgroup.list"
@@termgroup_loading =
50

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Loggable

#logger

Constructor Details

#initialize(root) ⇒ Dictionary

Returns a new instance of Dictionary.



36
37
38
39
40
41
42
43
44
45
46
47
48
49
# File 'lib/rsi/dictionary.rb', line 36

def initialize( root )
  @root = root
  @serializer = NativeSerializer.new()
  @terms_root = File.join( @root, "terms" )
  @terms = {} # term => id
  @entries = {} # termid => [TermEntry...]
  @pending_entries = {} # of termid=>1
  @termgroups = {} # termid -> termgroupid
  @meta = { 
    :next_termid => 0,
    :next_termgroup_id => 0,
    :next_termgroup_count => 0,
  }
end

Instance Attribute Details

#rootObject (readonly)

Returns the value of attribute root.



28
29
30
# File 'lib/rsi/dictionary.rb', line 28

def root
  @root
end

#serializerObject

Returns the value of attribute serializer.



27
28
29
# File 'lib/rsi/dictionary.rb', line 27

def serializer
  @serializer
end

#termsObject

Returns the value of attribute terms.



27
28
29
# File 'lib/rsi/dictionary.rb', line 27

def terms
  @terms
end

Instance Method Details

#add_term_entries(docid, termid, pos_list = [0]) ⇒ Object



106
107
108
109
110
111
# File 'lib/rsi/dictionary.rb', line 106

def add_term_entries( docid, termid, pos_list=[0] )
  e = TermEntry.new( docid )
  e.pos_list = pos_list
  e.freq = pos_list.length()
  add_entry( termid, e )
end

#get_entry_list(termid) ⇒ Object

Get a list of entries for the given termid. Creates the entry list, if it doesn’t already exist. Returns a list of TermEntries



116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# File 'lib/rsi/dictionary.rb', line 116

def get_entry_list( termid )
  logger.debug( "[termid #{termid}]" )
  unless @entries.has_key?( termid )
    logger.debug( "  No entry[#{termid}]" )
    unless @termgroups.has_key?( termid )
      logger.debug( "  No termgroups[#{termid}]"  )
      @termgroups[ termid ] = next_termgroup_id()
    end
    id = @termgroups[ termid ]
    logger.debug( "  Termgroup id=#{id}" )
    tg_fn = File.join( @terms_root, "#{id}.tg" )
    logger.debug( "            fn=#{tg_fn}" )
    if FileTest.exists?( tg_fn )
      logger.debug( "  Reloading termgroup record #{tg_fn}" )
      tg_f = File.open( tg_fn, "r" ) 
      tg = @serializer.load( tg_f )
      tg_f.close()
      tg.each do |tid, term_entries|
        @entries[tid] = term_entries
      end
    end
    unless @entries.has_key?( termid )
      logger.debug( "  Creating termgroup record" )
      @entries[termid] = []
    end
  end
  logger.debug( "[returning #{@entries[termid]}]" )
  return @entries[termid]
end

#get_termid_for(term, create = false) ⇒ Object

Get the termid for the given (tokenized) term. If create is true (the default), the given term has not been previously added to the dictionary, a new id will be created and returned.



97
98
99
100
101
102
103
104
# File 'lib/rsi/dictionary.rb', line 97

def get_termid_for( term, create=false )
  unless @terms.has_key?( term )
    return nil unless create
    t = self.next_termid()
    @terms[term] = t
  end
  return @terms[term]
end

#has_term?(term) ⇒ Boolean

Returns:

  • (Boolean)


90
91
92
# File 'lib/rsi/dictionary.rb', line 90

def has_term?( term )
  return @terms.has_key?( term )
end

#openObject



51
52
53
54
55
56
57
58
59
60
61
62
# File 'lib/rsi/dictionary.rb', line 51

def open()
  Dir.mkdir( @root ) unless FileTest.exists?( @root ) 
  logger.info( "Opening dictionary" )
  begin
    self.reload()
  rescue
    logger.debug( $! )
    logger.info( "DB does not exist (#{$!}), creating..." )
    self.create_store()
  end
  @opened = true
end

#storeObject

Serialize the current state of the dictionary. (Currently requires time proportional (at least) to the full size of the dictionary. This is a bug.)



67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# File 'lib/rsi/dictionary.rb', line 67

def store()
  logger.info( "Storing at #{@root}" )
  # meta info, stored as yaml
  @meta[ :serializer ] = @serializer;
  File.open( File.join( @root, META_FILE ), "w" ) do |meta_f|
    YAML.dump( @meta, meta_f )
  end
  
  # store terms
  term_fn = File.join( @root, TERMS_FILE )
  logger.debug( "Storing terms to #{term_fn}" )
  File.open( term_fn, "w" ) do |term_f|
    logger.debug( "terms=#{@terms}" )
    @serializer.dump( @terms, term_f )
  end

  File.open( File.join( @root, TERMGROUP_FILE), "w" ) do |termgroups_f|
    @serializer.dump( @termgroups, termgroups_f )
  end

  store_term_entries()
end