Class: RegExpNER
- Includes:
- SimpleDSL
- Defined in:
- lib/rbbt/ner/regexpNER.rb
Instance Attribute Summary collapse
-
#regexps ⇒ Object
Returns the value of attribute regexps.
-
#split_on_matches ⇒ Object
Returns the value of attribute split_on_matches.
Class Method Summary collapse
- .match_regexp(text, regexp, type = nil) ⇒ Object
- .match_regexp_hash(text, regexp_hash, split_on_matches = false) ⇒ Object
- .match_regexp_list(text, regexp_list, type = nil, split_on_matches = false) ⇒ Object
Instance Method Summary collapse
- #__define_regexp_hook(name, regexp, *args) ⇒ Object
- #add_regexp(list = {}) ⇒ Object
- #define_regexp(*args, &block) ⇒ Object
-
#initialize(regexps = {}) ⇒ RegExpNER
constructor
A new instance of RegExpNER.
- #match(text) ⇒ Object
- #token_score(*args) ⇒ Object
Methods inherited from NER
Constructor Details
#initialize(regexps = {}) ⇒ RegExpNER
Returns a new instance of RegExpNER.
76 77 78 |
# File 'lib/rbbt/ner/regexpNER.rb', line 76 def initialize(regexps = {}) @regexps = regexps.collect{|p| p } end |
Instance Attribute Details
#regexps ⇒ Object
Returns the value of attribute regexps.
75 76 77 |
# File 'lib/rbbt/ner/regexpNER.rb', line 75 def regexps @regexps end |
#split_on_matches ⇒ Object
Returns the value of attribute split_on_matches.
75 76 77 |
# File 'lib/rbbt/ner/regexpNER.rb', line 75 def split_on_matches @split_on_matches end |
Class Method Details
.match_regexp(text, regexp, type = nil) ⇒ Object
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
# File 'lib/rbbt/ner/regexpNER.rb', line 7 def self.match_regexp(text, regexp, type = nil) matches = [] start = 0 while matchdata = text.match(regexp) pre = matchdata.pre_match post = matchdata.post_match if matchdata.named_captures.any? match = matchdata[0] code = matchdata.named_captures.collect{|k,v| [k,v] * "=" } * ";" NamedEntity.setup(match, :offset => pre.length + start, :entity_type => type, :code => code) matches << match eend = match.length + pre.length text = text[eend..-1] start += match.length + pre.length elsif matchdata.captures.any? match = matchdata.captures.first offset, eend = matchdata.offset(1) NamedEntity.setup(match, :offset => start + offset, :entity_type => type) matches << match start += offset + match.length text = text[eend..-1] else match = matchdata[0] NamedEntity.setup(match, :offset => pre.length + start, :entity_type => type) matches << match eend = match.length + pre.length text = text[eend..-1] start += match.length + pre.length end end matches end |
.match_regexp_hash(text, regexp_hash, split_on_matches = false) ⇒ Object
57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
# File 'lib/rbbt/ner/regexpNER.rb', line 57 def self.match_regexp_hash(text, regexp_hash, split_on_matches = false) matches = [] regexp_hash.each do |type, regexp_list| regexp_list = [regexp_list] unless Array === regexp_list chunks = split_on_matches ? Segment.split(text, matches) : Segment.split(text, []) chunks.each do |chunk| chunk_offset = chunk.offset match_regexp_list(chunk, regexp_list, type, split_on_matches).each do |match| match.offset = match.offset + chunk_offset; matches << match end end end matches end |
.match_regexp_list(text, regexp_list, type = nil, split_on_matches = false) ⇒ Object
42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
# File 'lib/rbbt/ner/regexpNER.rb', line 42 def self.match_regexp_list(text, regexp_list, type = nil, split_on_matches = false) matches = [] regexp_list.each do |regexp| chunks = split_on_matches ? Segment.split(text, matches) : Segment.split(text, []) chunks = Segment.split(text, []) chunks.each do |chunk| new_matches = match_regexp(chunk, regexp, type) new_matches.each do |match| match.offset += chunk.offset; matches << match end end end matches end |
Instance Method Details
#__define_regexp_hook(name, regexp, *args) ⇒ Object
84 85 86 |
# File 'lib/rbbt/ner/regexpNER.rb', line 84 def __define_regexp_hook(name, regexp, *args) @regexps << [name, regexp] end |
#add_regexp(list = {}) ⇒ Object
92 93 94 |
# File 'lib/rbbt/ner/regexpNER.rb', line 92 def add_regexp(list = {}) @regexps.concat list.collect end |
#define_regexp(*args, &block) ⇒ Object
88 89 90 |
# File 'lib/rbbt/ner/regexpNER.rb', line 88 def define_regexp(*args, &block) load_config("__define_regexp_hook", *args, &block) end |
#match(text) ⇒ Object
96 97 98 99 100 101 |
# File 'lib/rbbt/ner/regexpNER.rb', line 96 def match(text) matches = RegExpNER.match_regexp_hash(text, @regexps, @split_on_matches) matches.collect do |m| NamedEntity.setup(m, :offset => m.offset, :type => m.type, :code => m.code || m) end end |
#token_score(*args) ⇒ Object
80 81 82 |
# File 'lib/rbbt/ner/regexpNER.rb', line 80 def token_score(*args) 1 end |