Class: PatternRelExt

Inherits:

Object

Object
PatternRelExt

show all

Defined in:: lib/rbbt/ner/patterns.rb

Instance Attribute Summary collapse

#token_trie ⇒ Object

Returns the value of attribute token_trie.
#type ⇒ Object

Returns the value of attribute type.

Class Method Summary collapse

Instance Method Summary collapse

#initialize(patterns, slack = nil, type = nil) ⇒ PatternRelExt constructor

A new instance of PatternRelExt.
#match_chunks(chunks) ⇒ Object
#match_sentences(sentences) ⇒ Object
#new_token_trie ⇒ Object
#slack(slack) ⇒ Object

Constructor Details

#initialize(patterns, slack = nil, type = nil) ⇒ `PatternRelExt`

Returns a new instance of PatternRelExt.

# File 'lib/rbbt/ner/patterns.rb', line 91

def initialize(patterns, slack = nil, type = nil)
  patterns = case
             when (Hash === patterns or TSV === patterns)
               patterns
             when Array === patterns
               {:Relation => patterns}
             when String === patterns
               {:Relation => [patterns]}
             end

  @type = type

  tokenized_patterns = {}

  patterns.each do |key, values|
    tokenized_patterns[key] = values.collect do |v| 
      Token.tokenize(v, /(NP\[[^\]]+\])|\s+/) 
    end
  end

  PatternRelExt.prepare_chunk_patterns(new_token_trie, tokenized_patterns, type)
  token_trie.slack = slack || Proc.new{|t| t.type != 'O'}
end

Instance Attribute Details

#token_trie ⇒ `Object`

Returns the value of attribute token_trie.



76
77
78

# File 'lib/rbbt/ner/patterns.rb', line 76

def token_trie
  @token_trie
end

#type ⇒ `Object`

Returns the value of attribute type.



76
77
78

# File 'lib/rbbt/ner/patterns.rb', line 76

def type
  @type
end

Class Method Details

.prepare_chunk_patterns(token_trie, patterns, type = nil) ⇒ `Object`



72
73
74

# File 'lib/rbbt/ner/patterns.rb', line 72

def self.prepare_chunk_patterns(token_trie, patterns, type = nil)
  token_trie.merge(transform_index(TokenTrieNER.process({}, patterns)), type)
end

.simple_pattern(sentence, patterns, type = nil) ⇒ `Object`

# File 'lib/rbbt/ner/patterns.rb', line 11

def self.simple_pattern(sentence, patterns, type = nil)
  patterns = Array === patterns ? patterns : [patterns]
  type ||= "Simple Pattern"
  regexpNER = RegExpNER.new type => patterns.collect{|p| /#{p}/}
  segments = sentence.segments
  segments = segments.values.flatten if Hash === segments
  Transformed.with_transform(sentence, segments, Proc.new{|s| s.type.to_s.upcase}) do |sentence|
    regexpNER.entities(sentence)
  end
end

.transform_index(index) ⇒ `Object`

# File 'lib/rbbt/ner/patterns.rb', line 52

def self.transform_index(index)
  new = {}

  index.each do |key,next_index|
    if Hash === next_index
      new_key = transform_key(key)
      if Proc === new_key
        new[:PROCS] ||= {}
        new[:PROCS][new_key] = transform_index(next_index)
      else
        new[new_key] = transform_index(next_index)
      end
    else
      new[transform_key(key)] = next_index
    end
  end

  new
end

.transform_key(key) ⇒ `Object`

# File 'lib/rbbt/ner/patterns.rb', line 23

def self.transform_key(key)
  case
  when key =~ /(.*)\[entity:(.*)\]/
    chunk_type, chunk_value = $1, $2
    annotation_types = chunk_value.split(",")
    Proc.new{|chunk| (chunk_type == "all" or (Array === chunk.type ? chunk.type.include?(chunk_type) : chunk.type == chunk_type)) and 
      ((Hash === chunk.segments ? chunk.segments.values.flatten : chunk.segments).flatten.select{|a| NamedEntity === a}.collect{|a| a.type.to_s}.flatten & annotation_types).any? }

  when key =~ /(.*)\[code:(.*)\]/
    chunk_type, chunk_value = $1, $2
    annotation_codes = chunk_value.split(",")
    Proc.new{|chunk| (chunk_type == "all" or (Array === chunk.type ? chunk.type.include?(chunk_type) : chunk.type == chunk_type)) and 
      ((Hash === chunk.segments ? chunk.segments.values.flatten : chunk.segments).select{|a| NamedEntity === a}.collect{|a| a.code}.flatten & annotation_codes).any? }

  when key =~ /(.*)\[stem:(.*)\]/
    chunk_type, chunk_value = $1, $2
    Proc.new{|chunk| (chunk_type == "all" or (Array === chunk.type ? chunk.type.include?(chunk_type) : chunk.type == chunk_type)) and 
      chunk.split(/\s+/).select{|w| w.stem == chunk_value.stem}.any?}

  when key =~ /(.*)\[(.*)\]/
    chunk_type, chunk_value = $1, $2
    Proc.new{|chunk| (chunk_type == "all" or (Array === chunk.type ? chunk.type.include?(chunk_type) : chunk.type == chunk_type)) and 
      chunk.parts.values.select{|a| a == chunk_value}.any?}

  else
    key
  end
end

Instance Method Details

#match_chunks(chunks) ⇒ `Object`

# File 'lib/rbbt/ner/patterns.rb', line 115

def match_chunks(chunks)
  token_trie.match(chunks).each do |match|
    match.extend Relationship
  end
end

#match_sentences(sentences) ⇒ `Object`

# File 'lib/rbbt/ner/patterns.rb', line 121

def match_sentences(sentences)
  sentence_chunks = NLP.gdep_chunk_sentences(sentences)

  sentences.zip(sentence_chunks).collect do |sentence, chunks|
    annotation_index = Segment.index(sentence.segments)
    chunks.each do |chunk|
      Segmented.setup(chunk, annotation_index[chunk.range])
    end

    match_chunks(chunks)
  end
end

#new_token_trie ⇒ `Object`



77
78
79

# File 'lib/rbbt/ner/patterns.rb', line 77

def new_token_trie
  @token_trie = TokenTrieNER.new({})
end

#slack(slack) ⇒ `Object`



86
87
88

# File 'lib/rbbt/ner/patterns.rb', line 86

def slack(slack)
  @token_trie.slack = slack
end

Class: PatternRelExt

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(patterns, slack = nil, type = nil) ⇒ PatternRelExt

Instance Attribute Details

#token_trie ⇒ Object

#type ⇒ Object

Class Method Details

.prepare_chunk_patterns(token_trie, patterns, type = nil) ⇒ Object

.simple_pattern(sentence, patterns, type = nil) ⇒ Object

.transform_index(index) ⇒ Object

.transform_key(key) ⇒ Object

Instance Method Details

#match_chunks(chunks) ⇒ Object

#match_sentences(sentences) ⇒ Object

#new_token_trie ⇒ Object

#slack(slack) ⇒ Object

#initialize(patterns, slack = nil, type = nil) ⇒ `PatternRelExt`

#token_trie ⇒ `Object`

#type ⇒ `Object`

.prepare_chunk_patterns(token_trie, patterns, type = nil) ⇒ `Object`

.simple_pattern(sentence, patterns, type = nil) ⇒ `Object`

.transform_index(index) ⇒ `Object`

.transform_key(key) ⇒ `Object`

#match_chunks(chunks) ⇒ `Object`

#match_sentences(sentences) ⇒ `Object`

#new_token_trie ⇒ `Object`

#slack(slack) ⇒ `Object`