Class: PatternRelExt
- Inherits:
-
Object
- Object
- PatternRelExt
- Defined in:
- lib/rbbt/ner/patterns.rb
Instance Attribute Summary collapse
-
#token_trie ⇒ Object
Returns the value of attribute token_trie.
-
#type ⇒ Object
Returns the value of attribute type.
Class Method Summary collapse
- .prepare_chunk_patterns(token_trie, patterns, type = nil) ⇒ Object
- .simple_pattern(sentence, patterns, type = nil) ⇒ Object
- .transform_index(index) ⇒ Object
- .transform_key(key) ⇒ Object
Instance Method Summary collapse
-
#initialize(patterns, slack = nil, type = nil) ⇒ PatternRelExt
constructor
A new instance of PatternRelExt.
- #match_chunks(chunks) ⇒ Object
- #match_sentences(sentences) ⇒ Object
- #new_token_trie ⇒ Object
- #slack(slack) ⇒ Object
Constructor Details
#initialize(patterns, slack = nil, type = nil) ⇒ PatternRelExt
Returns a new instance of PatternRelExt.
91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
# File 'lib/rbbt/ner/patterns.rb', line 91 def initialize(patterns, slack = nil, type = nil) patterns = case when (Hash === patterns or TSV === patterns) patterns when Array === patterns {:Relation => patterns} when String === patterns {:Relation => [patterns]} end @type = type tokenized_patterns = {} patterns.each do |key, values| tokenized_patterns[key] = values.collect do |v| Token.tokenize(v, /(NP\[[^\]]+\])|\s+/) end end PatternRelExt.prepare_chunk_patterns(new_token_trie, tokenized_patterns, type) token_trie.slack = slack || Proc.new{|t| t.type != 'O'} end |
Instance Attribute Details
#token_trie ⇒ Object
Returns the value of attribute token_trie.
76 77 78 |
# File 'lib/rbbt/ner/patterns.rb', line 76 def token_trie @token_trie end |
#type ⇒ Object
Returns the value of attribute type.
76 77 78 |
# File 'lib/rbbt/ner/patterns.rb', line 76 def type @type end |
Class Method Details
.prepare_chunk_patterns(token_trie, patterns, type = nil) ⇒ Object
72 73 74 |
# File 'lib/rbbt/ner/patterns.rb', line 72 def self.prepare_chunk_patterns(token_trie, patterns, type = nil) token_trie.merge(transform_index(TokenTrieNER.process({}, patterns)), type) end |
.simple_pattern(sentence, patterns, type = nil) ⇒ Object
11 12 13 14 15 16 17 18 19 20 |
# File 'lib/rbbt/ner/patterns.rb', line 11 def self.simple_pattern(sentence, patterns, type = nil) patterns = Array === patterns ? patterns : [patterns] type ||= "Simple Pattern" regexpNER = RegExpNER.new type => patterns.collect{|p| /#{p}/} segments = sentence.segments segments = segments.values.flatten if Hash === segments Transformed.with_transform(sentence, segments, Proc.new{|s| s.type.to_s.upcase}) do |sentence| regexpNER.entities(sentence) end end |
.transform_index(index) ⇒ Object
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
# File 'lib/rbbt/ner/patterns.rb', line 52 def self.transform_index(index) new = {} index.each do |key,next_index| if Hash === next_index new_key = transform_key(key) if Proc === new_key new[:PROCS] ||= {} new[:PROCS][new_key] = transform_index(next_index) else new[new_key] = transform_index(next_index) end else new[transform_key(key)] = next_index end end new end |
.transform_key(key) ⇒ Object
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
# File 'lib/rbbt/ner/patterns.rb', line 23 def self.transform_key(key) case when key =~ /(.*)\[entity:(.*)\]/ chunk_type, chunk_value = $1, $2 annotation_types = chunk_value.split(",") Proc.new{|chunk| (chunk_type == "all" or (Array === chunk.type ? chunk.type.include?(chunk_type) : chunk.type == chunk_type)) and ((Hash === chunk.segments ? chunk.segments.values.flatten : chunk.segments).flatten.select{|a| NamedEntity === a}.collect{|a| a.type.to_s}.flatten & annotation_types).any? } when key =~ /(.*)\[code:(.*)\]/ chunk_type, chunk_value = $1, $2 annotation_codes = chunk_value.split(",") Proc.new{|chunk| (chunk_type == "all" or (Array === chunk.type ? chunk.type.include?(chunk_type) : chunk.type == chunk_type)) and ((Hash === chunk.segments ? chunk.segments.values.flatten : chunk.segments).select{|a| NamedEntity === a}.collect{|a| a.code}.flatten & annotation_codes).any? } when key =~ /(.*)\[stem:(.*)\]/ chunk_type, chunk_value = $1, $2 Proc.new{|chunk| (chunk_type == "all" or (Array === chunk.type ? chunk.type.include?(chunk_type) : chunk.type == chunk_type)) and chunk.split(/\s+/).select{|w| w.stem == chunk_value.stem}.any?} when key =~ /(.*)\[(.*)\]/ chunk_type, chunk_value = $1, $2 Proc.new{|chunk| (chunk_type == "all" or (Array === chunk.type ? chunk.type.include?(chunk_type) : chunk.type == chunk_type)) and chunk.parts.values.select{|a| a == chunk_value}.any?} else key end end |
Instance Method Details
#match_chunks(chunks) ⇒ Object
115 116 117 118 119 |
# File 'lib/rbbt/ner/patterns.rb', line 115 def match_chunks(chunks) token_trie.match(chunks).each do |match| match.extend Relationship end end |
#match_sentences(sentences) ⇒ Object
121 122 123 124 125 126 127 128 129 130 131 132 |
# File 'lib/rbbt/ner/patterns.rb', line 121 def match_sentences(sentences) sentence_chunks = NLP.gdep_chunk_sentences(sentences) sentences.zip(sentence_chunks).collect do |sentence, chunks| annotation_index = Segment.index(sentence.segments) chunks.each do |chunk| Segmented.setup(chunk, annotation_index[chunk.range]) end match_chunks(chunks) end end |
#new_token_trie ⇒ Object
77 78 79 |
# File 'lib/rbbt/ner/patterns.rb', line 77 def new_token_trie @token_trie = TokenTrieNER.new({}) end |
#slack(slack) ⇒ Object
86 87 88 |
# File 'lib/rbbt/ner/patterns.rb', line 86 def slack(slack) @token_trie.slack = slack end |