Module: GNormPlus

Defined in:
lib/rbbt/ner/g_norm_plus.rb

Constant Summary collapse

CONFIG =
<<-EOF
#===Annotation
#Attribution setting:
#FocusSpecies = Taxonomy ID
#       All: All species
#       9606: Human
#       4932: yeast
#       7227: Fly
#       10090: Mouse
#       10116: Rat
#       7955: Zebrafish
#       3702: Arabidopsis thaliana
#open: True
#close: False

[Focus Species]
	FocusSpecies = 9606
	FilterAntibody = False
[Dictionary & Model]
	DictionaryFolder = ./Dictionary
	GNRModel = ./Dictionary/GNR.Model
	SCModel = ./Dictionary/SimConcept.Model
	GeneIDMatch = True
	HomologeneID = False
	Normalization2Protein = False
	ShowUnNormalizedMention = False
	IgnoreNER = False
	DeleteTmp = True
EOF

Class Method Summary collapse

Class Method Details

.entities(texts) ⇒ Object



91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# File 'lib/rbbt/ner/g_norm_plus.rb', line 91

def self.entities(texts)
  res = {}
  process(texts).each do |name, entities|

    segments = entities.collect do |entity|
      start, eend, literal, type, code = entity.split(":")
      literal.gsub!('·',':')

      NamedEntity.setup(literal, :offset => start.to_i, :entity_type => type, :code => code)
    end

    res[name] = segments
  end
  res
end

.process(texts) ⇒ Object



44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# File 'lib/rbbt/ner/g_norm_plus.rb', line 44

def self.process(texts)
  TmpFile.with_file do |tmpdir|
    Open.mkdir tmpdir

    Misc.in_dir tmpdir do
      Open.ln_s Rbbt.software.opt.GNormPlus.Dictionary.find, '.'
      Open.ln_s Rbbt.software.opt.GNormPlus["BioC.dtd"].find, '.'
      Open.ln_s Rbbt.software.opt.GNormPlus["Ab3P"].find, '.'
      Open.ln_s Rbbt.software.opt.GNormPlus["CRF"].find, '.'
      Open.mkdir 'input'
      Open.mkdir 'output'
      Open.mkdir 'tmp'

      texts.each do |name,text|
        text = Misc.fixutf8(text)

        text = text.gsub('|', '#').gsub("\n", " ").gsub(/\t/,' ')

        Open.write("input/#{name}.txt") do |f|
          f.puts "#{name}|a|" << text
          f.puts
        end
      end

      Open.write('config', CONFIG)
      mem = Rbbt::Config.get(:java_mem, :GNormPlus, :g_norm_plus, :gnormplus, :gnp, :default => "8G")
      CMD.cmd_log("java -Xmx#{mem} -Xms#{mem}  -jar '#{Rbbt.software.opt.GNormPlus.produce.find}/GNormPlus.jar' 'input' 'output' 'config'")

      if texts.respond_to? :key_field
        key_field = texts.key_field
      else
        key_field = "ID"
      end
      tsv = TSV.setup({}, :key_field => key_field, :fields => ["Entities"], :type => :flat)
      Dir.glob("output/*.txt").each do |file|
        name = File.basename(file).sub(".txt",'')
        entities = Open.read(file).split("\n")[1..-1].collect{|l| l.gsub(':', '·').split("\t")[1..-1] * ":"}
        tsv[name] = entities
      end

      raise "GNormPlus failed: no results found" if tsv.size == 0 && texts.size > 0

      tsv
    end
  end
end