Class: NERFeatures
Instance Attribute Summary collapse
-
#reverse ⇒ Object
Returns the value of attribute reverse.
Class Method Summary collapse
Instance Method Summary collapse
- #config ⇒ Object
- #context(name, &block) ⇒ Object
- #define(name, *args, &block) ⇒ Object
- #direction(dir) ⇒ Object
- #features(word) ⇒ Object
-
#initialize(file = nil, reverse = false, &block) ⇒ NERFeatures
constructor
A new instance of NERFeatures.
- #tagged_features(text, mentions) ⇒ Object
- #template(window = nil) ⇒ Object
- #text_features(text, positive = nil) ⇒ Object
- #train(features, model) ⇒ Object
- #window(positions) ⇒ Object
Constructor Details
#initialize(file = nil, reverse = false, &block) ⇒ NERFeatures
Returns a new instance of NERFeatures.
34 35 36 37 38 39 40 41 42 43 |
# File 'lib/rbbt/ner/rner.rb', line 34 def initialize(file = nil, reverse = false, &block) @types = {} @order = [] @context = [] @reverse = reverse file ||= Rbbt.share.ner['config.rb'].find if !file && !block parse(:define, file, &block) end |
Instance Attribute Details
#reverse ⇒ Object
Returns the value of attribute reverse.
33 34 35 |
# File 'lib/rbbt/ner/rner.rb', line 33 def reverse @reverse end |
Class Method Details
.reverse(text) ⇒ Object
19 20 21 |
# File 'lib/rbbt/ner/rner.rb', line 19 def self.reverse(text) tokens(text).reverse.join(" ") end |
.tokens(text) ⇒ Object
9 10 11 12 13 14 15 16 17 |
# File 'lib/rbbt/ner/rner.rb', line 9 def self.tokens(text) text.scan(/ \w*-?(?:\d*\d[.,]\d\d*|\d+)\w*| \w-\w*| \w+-[A-Z](?!\w)| \w+| [.,()\/\[\]{}'"+-] /x) end |
Instance Method Details
#config ⇒ Object
45 46 47 |
# File 'lib/rbbt/ner/rner.rb', line 45 def config @config[:define] end |
#context(name, &block) ⇒ Object
53 54 55 56 57 58 59 60 61 62 63 64 65 |
# File 'lib/rbbt/ner/rner.rb', line 53 def context(name, &block) if name.is_a? Array @context += name else @context.push name # The block might be wrongly assigned to this function # instead of the actual definition, fix that. if block @types[name] = block end end end |
#define(name, *args, &block) ⇒ Object
23 24 25 26 27 28 29 30 31 |
# File 'lib/rbbt/ner/rner.rb', line 23 def define(name, *args, &block) action = args[0] || block || /#{name.to_s}s?/i raise "Wrong format" unless (action.is_a?(Proc) || action.is_a?(Regexp)) @types[name.to_s] = action @order.push name.to_s name.to_s end |
#direction(dir) ⇒ Object
67 68 69 70 71 |
# File 'lib/rbbt/ner/rner.rb', line 67 def direction(dir) if dir.to_sym == :reverse @reverse = true end end |
#features(word) ⇒ Object
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
# File 'lib/rbbt/ner/rner.rb', line 73 def features(word) values = [word] @order.each{|features| action = @types[features] if action.is_a?(Proc) values.push(action.call(word)) else m = action.match(word) if m if m[1] values.push(m[1]) else values.push(m != nil) end else values.push(false) end end } values end |
#tagged_features(text, mentions) ⇒ Object
131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
# File 'lib/rbbt/ner/rner.rb', line 131 def tagged_features(text, mentions) mentions ||= [] mentions = ['IMPOSSIBLE_MATCH'] if mentions.empty? re = mentions.collect{|mention| Regexp.quote(mention.gsub(/\s+/,' ')).sub(/\\s/,'\s+') }.join("|") positive = false features = [] chunks = text.split(/(#{re})/) chunks.each{|t| chunk_features = text_features(t, positive) positive = !positive if @reverse features = chunk_features + features else features = features + chunk_features end } features end |
#template(window = nil) ⇒ Object
96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
# File 'lib/rbbt/ner/rner.rb', line 96 def template(window=nil) window ||= @window || [1,-1] template = "" i = 1 @order.each{|feat| template += "U#{ feat }: %x[0,#{ i }]\n" if @context.include?(feat) window.each{|p| template += "U#{ feat }##{ p}: %x[#{ p },#{ i }]\n" } end i += 1 } template += "B\n" template end |
#text_features(text, positive = nil) ⇒ Object
118 119 120 121 122 123 124 125 126 127 128 129 |
# File 'lib/rbbt/ner/rner.rb', line 118 def text_features(text, positive = nil) text = self.class.reverse(text) if @reverse initial = true self.class.tokens(text).collect{|token| features = features(token) if !positive.nil? features << (positive ? (initial ? 1 : 2) : 0) initial = false end features } end |
#train(features, model) ⇒ Object
153 154 155 156 157 158 159 160 161 |
# File 'lib/rbbt/ner/rner.rb', line 153 def train(features, model) tmp_template = TmpFile.tmp_file("template-") Open.write(tmp_template,template) cmd = "#{File.join(Rbbt.datadir, 'third_party/crf++/bin/crf_learn')} '#{tmp_template}' '#{features}' '#{model}'" system cmd Open.write(model + '.config',config) FileUtils.rm tmp_template end |
#window(positions) ⇒ Object
49 50 51 |
# File 'lib/rbbt/ner/rner.rb', line 49 def window(positions) @window = positions end |