Class: KnowledgeBase

Inherits:
Object
  • Object
show all
Defined in:
lib/rbbt/knowledge_base.rb,
lib/rbbt/knowledge_base/query.rb,
lib/rbbt/knowledge_base/entity.rb,
lib/rbbt/knowledge_base/registry.rb,
lib/rbbt/knowledge_base/traverse.rb,
lib/rbbt/knowledge_base/syndicate.rb,
lib/rbbt/knowledge_base/enrichment.rb

Defined Under Namespace

Classes: Traverser

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(dir, namespace = nil) ⇒ KnowledgeBase

Returns a new instance of KnowledgeBase.



11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# File 'lib/rbbt/knowledge_base.rb', line 11

def initialize(dir, namespace = nil)
  @dir = Path.setup(dir.dup)

  @namespace = namespace
  @format = IndiferentHash.setup({})

  @registry ||= IndiferentHash.setup({})
  @entity_options = IndiferentHash.setup({})

  @indices = IndiferentHash.setup({})
  @databases = IndiferentHash.setup({})
  @identifiers = IndiferentHash.setup({})
  @fields = {}
  @descriptions = {}
  @databases = {}
end

Instance Attribute Details

#databasesObject

Returns the value of attribute databases.



10
11
12
# File 'lib/rbbt/knowledge_base.rb', line 10

def databases
  @databases
end

#dirObject

Returns the value of attribute dir.



10
11
12
# File 'lib/rbbt/knowledge_base.rb', line 10

def dir
  @dir
end

#entity_optionsObject

Returns the value of attribute entity_options.



10
11
12
# File 'lib/rbbt/knowledge_base.rb', line 10

def entity_options
  @entity_options
end

#formatObject

Returns the value of attribute format.



10
11
12
# File 'lib/rbbt/knowledge_base.rb', line 10

def format
  @format
end

#indicesObject

Returns the value of attribute indices.



10
11
12
# File 'lib/rbbt/knowledge_base.rb', line 10

def indices
  @indices
end

#namespaceObject

Returns the value of attribute namespace.



10
11
12
# File 'lib/rbbt/knowledge_base.rb', line 10

def namespace
  @namespace
end

#registryObject

Returns the value of attribute registry.



10
11
12
# File 'lib/rbbt/knowledge_base.rb', line 10

def registry
  @registry
end

Class Method Details

.load(dir) ⇒ Object



28
29
30
# File 'lib/rbbt/knowledge_base.rb', line 28

def self.load(dir)
  KnowledgeBase.new dir
end

Instance Method Details

#_children(name, entity) ⇒ Object



55
56
57
58
# File 'lib/rbbt/knowledge_base/query.rb', line 55

def _children(name, entity)
  repo = get_index name
  repo.match(entity)
end

#_neighbours(name, entity) ⇒ Object



77
78
79
80
81
82
83
# File 'lib/rbbt/knowledge_base/query.rb', line 77

def _neighbours(name, entity)
  if undirected(name) and source(name) == target(name)
    {:children => _children(name, entity)}
  else
    {:parents => _parents(name, entity), :children => _children(name, entity)}
  end
end

#_parents(name, entity) ⇒ Object



65
66
67
68
# File 'lib/rbbt/knowledge_base/query.rb', line 65

def _parents(name, entity)
  repo = get_index name
  repo.reverse.match(entity)
end

#_subset(name, source = :all, target = :all, options = {}) ⇒ Object



5
6
7
8
9
# File 'lib/rbbt/knowledge_base/query.rb', line 5

def _subset(name, source = :all, target = :all, options = {})
  repo = get_index name, options

  repo.subset(source, target)
end

#all(name, options = {}) ⇒ Object



50
51
52
53
# File 'lib/rbbt/knowledge_base/query.rb', line 50

def all(name, options={})
  repo = get_index name, options
  setup name, repo.keys
end

#all_databasesObject



17
18
19
# File 'lib/rbbt/knowledge_base/registry.rb', line 17

def all_databases
  @registry.keys 
end

#annotate(entities, type, database = nil) ⇒ Object



39
40
41
42
43
# File 'lib/rbbt/knowledge_base/entity.rb', line 39

def annotate(entities, type, database = nil)
  format = @format[type] || type
  entity_options = entity_options_for(type, database)
  Misc.prepare_entity(entities, format, entity_options)
end

#children(name, entity) ⇒ Object



60
61
62
63
# File 'lib/rbbt/knowledge_base/query.rb', line 60

def children(name, entity)
  entity = identify_source(name, entity)
  setup(name, _children(name, entity))
end

#db_namespace(name) ⇒ Object



73
74
75
# File 'lib/rbbt/knowledge_base/entity.rb', line 73

def db_namespace(name)
  get_database(name).namespace
end

#description(name) ⇒ Object



25
26
27
# File 'lib/rbbt/knowledge_base/registry.rb', line 25

def description(name)
  @descriptions[name] ||= get_index(name).key_field.split("~")
end

#enrichment(name, entities, options = {}) ⇒ Object



3
4
5
6
7
8
# File 'lib/rbbt/knowledge_base/enrichment.rb', line 3

def enrichment(name, entities, options = {})
  require 'rbbt/statistics/hypergeometric'
  database = get_database(name, options)
  entities = identify_source name, entities
  database.enrichment entities, database.fields.first, :persist => false
end

#entitiesObject



61
62
63
# File 'lib/rbbt/knowledge_base/entity.rb', line 61

def entities
  all_databases.inject([]){|acc,name| acc << source(name); acc << target(name)}.uniq
end

#entity_options_for(type, database_name = nil) ⇒ Object



22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# File 'lib/rbbt/knowledge_base/entity.rb', line 22

def entity_options_for(type, database_name = nil)
  entity_options = self.entity_options
  IndiferentHash.setup entity_options if entity_options and not IndiferentHash === entity_options
  options = entity_options[type.to_s] || entity_options[Entity.formats[type.to_s].to_s] || {}
  options[:format] = @format[type] if Hash === @format && @format.include?(type)
  namespace = self.namespace
  namespace = db_namespace(database_name) if namespace.nil? and database_name
  options = {:organism => namespace}.merge(options)
  if database_name  
    database = get_database(database_name)
    if database.entity_options and (database.entity_options[type] or database.entity_options[Entity.formats[type.to_s].to_s])
      options = options.merge(database.entity_options[type] || database.entity_options[Entity.formats[type.to_s].to_s])
    end
  end
  options
end

#entity_typesObject



65
66
67
# File 'lib/rbbt/knowledge_base/entity.rb', line 65

def entity_types
  entities.collect{|entity| Entity.formats[entity] }.uniq
end

#fields(name) ⇒ Object



21
22
23
# File 'lib/rbbt/knowledge_base/registry.rb', line 21

def fields(name)
  @fields[name] ||= get_index(name).fields
end

#get_database(name, options = {}) ⇒ Object



93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# File 'lib/rbbt/knowledge_base/registry.rb', line 93

def get_database(name, options = {})
  name = name.to_s

  options = options.dup
  if self.namespace == options[:namespace]
    options.delete(:namespace) 
  end
  if self.namespace == options[:organism]
    options.delete(:organism) 
  end
  @databases[[name, options]] ||= 
    begin 
      fp = Misc.fingerprint([name,options])

      if options.empty?
        key = name.to_s
      else
        fp = Misc.hash2md5(options)
        key = name.to_s + "_" + fp
      end

      options[:organism] ||= options[:namespace] ||= self.namespace unless self.namespace.nil?

      key += '.database'
      Persist.memory("Database:" << [key, dir] * "@") do
        options = options.dup

        persist_dir = dir
        persist_file = persist_dir[key].find
        file, registered_options = registry[name]

        options = Misc.add_defaults options, registered_options if registered_options and registered_options.any?
        options = Misc.add_defaults options, :persist_file => persist_file, :format => format, :persist => true

        if entity_options
          options[:entity_options] ||= {}
          entity_options.each do |type, info|
            options[:entity_options][type] ||= {}
            options[:entity_options][type] = Misc.add_defaults options[:entity_options][type], info
          end
        end

        persist_options = Misc.pull_keys options, :persist

        database = if persist_file.exists? and persist_options[:persist] and not persist_options[:update]
                     Log.low "Re-opening database #{ name } from #{ Misc.fingerprint persist_file }. #{options}"
                     Association.open(file, options, persist_options)
                   else
                     options = Misc.add_defaults options, registered_options if registered_options
                     raise "Repo #{ name } not found and not registered" if file.nil?
                     Log.medium "Opening database #{ name } from #{ Misc.fingerprint file }. #{options}"
                     Association.open(file, options, persist_options)
                   end

        database.namespace = self.namespace if self.namespace

        database
      end
    end
end

#get_index(name, options = {}) ⇒ Object



41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# File 'lib/rbbt/knowledge_base/registry.rb', line 41

def get_index(name, options = {})
  name = name.to_s
  options[:organism] ||= options[:namespace] ||= self.namespace unless self.namespace.nil?
  @indices[[name, options]] ||= 
    begin 
      if options.empty?
        key = name.to_s
      elsif options[:key]
        key = options[:key]
        key = name if key == :name
      else
        fp = Misc.hash2md5(options)
        key = name.to_s + "_" + fp
      end

      Persist.memory("Index:" << [key, dir] * "@") do
        options = options.dup

        persist_dir = dir
        persist_file = persist_dir[key].find
        file, registered_options = registry[name]

        options = Misc.add_defaults options, registered_options if registered_options and registered_options.any?
        options = Misc.add_defaults options, :persist_file => persist_file, :persist_dir => persist_dir, :format => format, :persist => true

        if entity_options
          options[:entity_options] ||= {}
          entity_options.each do |type, info|
            options[:entity_options][type] ||= {}
            options[:entity_options][type] = Misc.add_defaults options[:entity_options][type], info
          end
        end

        persist_options = Misc.pull_keys options, :persist

        index = if persist_file.exists? and persist_options[:persist] and not persist_options[:update]
                  Log.low "Re-opening index #{ name } from #{ Misc.fingerprint persist_file }. #{options}"
                  Association.index(file, options, persist_options.dup)
                else
                  options = Misc.add_defaults options, registered_options if registered_options
                  raise "Repo #{ name } not found and not registered" if file.nil?
                  Log.medium "Opening index #{ name } from #{ Misc.fingerprint file }. #{options}"
                  Association.index(file, options, persist_options.dup)
                end

        index.namespace = self.namespace unless self.namespace

        index
      end
    end
end

#identifier_files(name) ⇒ Object



69
70
71
# File 'lib/rbbt/knowledge_base/entity.rb', line 69

def identifier_files(name)
  get_database(name).identifier_files.dup
end

#identify(name, entity) ⇒ Object



116
117
118
# File 'lib/rbbt/knowledge_base/entity.rb', line 116

def identify(name, entity)
  identify_source(name, entity) || identify_target(name, entity)
end

#identify_source(name, entity) ⇒ Object



101
102
103
104
105
106
# File 'lib/rbbt/knowledge_base/entity.rb', line 101

def identify_source(name, entity)
  return :all if entity == :all
  index = source_index(name)
  return entity if index.nil?
  Array === entity ? index.values_at(*entity) : index[entity]
end

#identify_target(name, entity) ⇒ Object



109
110
111
112
113
114
# File 'lib/rbbt/knowledge_base/entity.rb', line 109

def identify_target(name, entity)
  return :all if entity == :all
  index = target_index(name)
  return entity if index.nil?
  Array === entity ? index.values_at(*entity) : index[entity]
end

#index_fields(name) ⇒ Object



154
155
156
# File 'lib/rbbt/knowledge_base/registry.rb', line 154

def index_fields(name)
  get_index(name).fields
end

#info(name) ⇒ Object



163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
# File 'lib/rbbt/knowledge_base/registry.rb', line 163

def info(name)

  source = self.source(name)
  target = self.target(name)
  source_type = self.source_type(name)
  target_type = self.target_type(name)
  fields = self.fields(name)
  source_entity_options = self.entity_options_for source_type, name
  target_entity_options = self.entity_options_for target_type, name
  undirected = self.undirected(name) == 'undirected'

  info = {
    :source => source,
    :target => target,
    :source_type => source_type,
    :target_type => target_type,
    :source_entity_options => source_entity_options,
    :target_entity_options => target_entity_options,
    :fields => fields,
    :undirected => undirected,
  }

  info
end

#neighbours(name, entity) ⇒ Object



85
86
87
88
89
90
91
# File 'lib/rbbt/knowledge_base/query.rb', line 85

def neighbours(name, entity)
  hash = _neighbours(name, entity)
  IndiferentHash.setup(hash)
  setup(name, hash[:children]) if hash[:children] 
  setup(name, hash[:parents], true) if hash[:parents]
  hash
end

#parents(name, entity) ⇒ Object



70
71
72
73
74
75
# File 'lib/rbbt/knowledge_base/query.rb', line 70

def parents(name, entity)
  entity = identify_target(name, entity)
  matches = _parents(name, entity)
  #matches.each{|m| m.replace(m.partition("~").reverse*"") } unless undirected(name)
  setup(name, matches, true)
end

#produce(name, *rest, &block) ⇒ Object



158
159
160
161
# File 'lib/rbbt/knowledge_base/registry.rb', line 158

def produce(name, *rest,&block)
  register(name, *rest, &block)
  get_index(name)
end

#register(name, file = nil, options = {}, &block) ⇒ Object



6
7
8
9
10
11
12
13
14
15
# File 'lib/rbbt/knowledge_base/registry.rb', line 6

def register(name, file = nil, options = {}, &block)
  if block_given?
    block.define_singleton_method(:filename) do name.to_s end
    Log.debug("Registering #{ name } from code block")
    @registry[name] = [block, options]
  else
    Log.debug("Registering #{ name }: #{ Misc.fingerprint file } #{Misc.fingerprint options}")
    @registry[name] = [file, options]
  end
end

#select_entities(name, entities, options = {}) ⇒ Object



6
7
8
9
10
11
12
13
14
15
16
17
18
19
# File 'lib/rbbt/knowledge_base/entity.rb', line 6

def select_entities(name, entities, options = {})
  index = get_index(name, options)

  source_field = index.source_field
  target_field = index.target_field

  source_type = Entity.formats[source_field] 
  target_type = Entity.formats[target_field]

  source_entities = entities[:source] || entities[source_field] || entities[Entity.formats[source_field].to_s] || entities[:both]
  target_entities = entities[:target] || entities[target_field] || entities[Entity.formats[target_field].to_s] || entities[:both]

  [source_entities, target_entities]
end

#setup(name, matches, reverse = false) ⇒ Object



32
33
34
# File 'lib/rbbt/knowledge_base.rb', line 32

def setup(name, matches, reverse = false)
  AssociationItem.setup matches, self, name, reverse
end

#source(name) ⇒ Object



29
30
31
# File 'lib/rbbt/knowledge_base/registry.rb', line 29

def source(name)
  description(name)[0]
end

#source_index(name) ⇒ Object



77
78
79
80
81
82
83
84
85
86
87
# File 'lib/rbbt/knowledge_base/entity.rb', line 77

def source_index(name)
  Persist.memory("Source index #{name}: KB directory #{dir}") do
    identifier_files = identifier_files(name)
    identifier_files.concat Entity.identifier_files(source(name)) if defined? Entity
    identifier_files.uniq!
    identifier_files.collect!{|f| f.annotate(f.gsub(/\bNAMESPACE\b/, namespace))} if namespace
    identifier_files.collect!{|f| f.annotate(f.gsub(/\bNAMESPACE\b/, db_namespace(name)))} if not namespace and db_namespace(name)
    identifier_files.reject!{|f| f.match(/\bNAMESPACE\b/)}
    TSV.translation_index identifier_files, source(name), nil, :persist => true
  end
end

#source_type(name) ⇒ Object



53
54
55
# File 'lib/rbbt/knowledge_base/entity.rb', line 53

def source_type(name)
  Entity.formats[source(name)]
end

#subset(name, entities, options = {}, &block) ⇒ Object



11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# File 'lib/rbbt/knowledge_base/query.rb', line 11

def subset(name, entities, options = {}, &block)
  entities, options = options, entities if entities.nil? and Hash === options

  entities = case entities
             when :all
               {:target => :all, :source => :all}
             when AnnotatedArray
               format = entities.format if entities.respond_to? :format 
               format ||= entities.base_entity.to_s
               {format => entities.clean_annotations}
             when Hash
               entities
             else
               raise "Entities are not a Hash or an AnnotatedArray: #{Misc.fingerprint entities}"
             end

  identify, identify_source, identify_target = entities.merge(options || {}).values_at :identify, :identify_source, :identify_target

  source, target = select_entities(name, entities, options)
  
  source = identify_source(name, source) if identify_source
  target = identify_target(name, target) if identify_target

  source = identify(name, source) if identify && !identify_source
  target = identify(name, target) if identify && !identify_target

  return [] if source.nil? or target.nil?
  return [] if Array === target and target.empty?
  return [] if Array === source and source.empty?

  matches = _subset name, source, target, options

  setup(name, matches)

  matches = matches.select(&block) if block_given? 

  matches
end

#syndicate(name, kb) ⇒ Object



2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# File 'lib/rbbt/knowledge_base/syndicate.rb', line 2

def syndicate(name, kb)
  kb.all_databases.each do |database|
    if name.nil?
      db_name = database
    else
      db_name = [database, name] * "@"
    end
    file, kb_options = kb.registry[database]
    options = {}
    options[:entity_options] = kb_options[:entity_options]
    options[:undirected] = kb_options[:undirected] if kb_options 
    if kb.entity_options
      options[:entity_options] = kb.entity_options.merge(options[:entity_options] || {})
    end

    register(db_name, nil, options) do
      kb.get_database(database)
    end
  end
end

#target(name) ⇒ Object



33
34
35
# File 'lib/rbbt/knowledge_base/registry.rb', line 33

def target(name)
  description(name)[1]
end

#target_index(name) ⇒ Object



89
90
91
92
93
94
95
96
97
98
99
# File 'lib/rbbt/knowledge_base/entity.rb', line 89

def target_index(name)
  Persist.memory("Target index #{name}: KB directory #{dir}") do
    identifier_files = identifier_files(name)
    identifier_files.concat Entity.identifier_files(target(name)) if defined? Entity
    identifier_files.uniq!
    identifier_files.collect!{|f| f.annotate(f.gsub(/\bNAMESPACE\b/, namespace))} if self.namespace
    identifier_files.collect!{|f| f.annotate(f.gsub(/\bNAMESPACE\b/, db_namespace(name)))} if namespace.nil? and db_namespace(name)
    identifier_files.reject!{|f| f.match(/\bNAMESPACE\b/)}
    TSV.translation_index identifier_files, target(name), nil, :persist => true
  end
end

#target_type(name) ⇒ Object



57
58
59
# File 'lib/rbbt/knowledge_base/entity.rb', line 57

def target_type(name)
  Entity.formats[target(name)]
end

#translate(entities, type) ⇒ Object



45
46
47
48
49
50
51
# File 'lib/rbbt/knowledge_base/entity.rb', line 45

def translate(entities, type)
  if format = @format[type] and (entities.respond_to? :format and format != entities.format)
    entities.to format
  else
    entities
  end
end

#traverse(rules) ⇒ Object



310
311
312
313
# File 'lib/rbbt/knowledge_base/traverse.rb', line 310

def traverse(rules)
  traverser = KnowledgeBase::Traverser.new self, rules
  traverser.traverse
end

#undirected(name) ⇒ Object



37
38
39
# File 'lib/rbbt/knowledge_base/registry.rb', line 37

def undirected(name)
  description(name)[2]
end