10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
|
# File 'lib/rbbt/ner/pubtator.rb', line 10
def self.pubtator_entities(pmids, concepts = ['gene'], alignments = nil)
texts = {}
entities = {}
last = nil
Misc.chunk(pmids, 1000) do |chunk|
time = Time.now
if last
diff = time - last
if diff < 3
sleep(3 - diff)
end
end
last = time
response = RestClient.post(PUBTATOR_URL, {pmids: chunk, concepts: concepts}.to_json, {content_type: 'json', accept: 'json'}).body
response.split("\n").each do |line|
next if line.empty?
if line =~ /^\d+\|/
pmid, text_type, content = line.split("|")
texts[pmid] ||= []
texts[pmid] << content
else
pmid, start, eend, literal, type, code = line.split("\t")
ne = NamedEntity.setup(literal, code: code, type: type, offset: start.to_i)
entities[pmid] ||= []
entities[pmid] << ne
end
end
end
if alignments
new_entities = {}
entities.each do |pmid,list|
text = texts[pmid] * " "
alignment = alignments[pmid]
raise "Alignment for #{pmid} not found" if alignment.nil?
greek_characters = Misc.greek_characters
new_list = Transformed.with_transform(alignment, greek_characters.keys, lambda{|k| greek_characters[k] }) do
list.collect do |entity|
begin
Segment.relocate(entity, text, alignment, 10)
entity
rescue Exception
Log.low "Entity #{entity} (#{entity.range}) not found in alignment text for #{pmid}"
next
end
end
end
new_entities[pmid] = new_list.compact
end
entities = new_entities
end
entities
end
|