Module: PubMed

Defined in:
lib/rbbt/sources/pubmed.rb

Overview

This module offers an interface with PubMed, to perform queries, and retrieve simple information from articles. It uses the caching services of Rbbt.

Defined Under Namespace

Classes: Article

Constant Summary collapse

@@pubmed_lag =
1

Class Method Summary collapse

Class Method Details

.get_article(pmids) ⇒ Object

return Article.new(xml)

  end
end

end



249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
# File 'lib/rbbt/sources/pubmed.rb', line 249

def self.get_article(pmids)
  _array = Array === pmids

  pmids = [pmids] unless Array === pmids
  pmids = pmids.compact.collect{|id| id}

  chunk_size = 50
  result_files = FileCache.cache_online_elements(pmids, 'pubmed-{ID}.xml') do |ids|
    result = {}
    values = []
    chunks = Misc.divide(ids, (ids.length / chunk_size) + 1)
    Log::ProgressBar.with_bar(chunks.length, :desc => "Downloading articles from PubMed") do |bar|
      bar.init
      chunks.each do |list|
        begin
          Misc.try3times do
            url = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi" 

            postdata = "db=pubmed&retmode=xml&id=#{list* ","}"
            xml = TmpFile.with_file(postdata) do |postfile|
              #Open.read(url, :quiet => true, :nocache => true, :nice => @@pubmed_lag, :nice_key => "PubMed", "--post-file=" => postfile)
              Open.read(url+'?'+postdata, :quiet => true, :nocache => true, :nice => @@pubmed_lag, :nice_key => "PubMed", "--__post-file=" => postfile)
            end

            values += xml.scan(/(<PubmedArticle>.*?<\/PubmedArticle>)/smu).flatten
          end
        rescue Aborted
          raise $!
        rescue Exception
          Log.exception $!
        ensure
          bar.tick
        end
      end
    end

    values.each do |xml|
      pmid = xml.scan(/<PMID[^>]*?>(.*?)<\/PMID>/).flatten.first
      
      result[pmid] = xml
    end

    ids.each{|id| next if id.nil? or result[id]; fid = String === id ? id.sub(/^0+/,'') : id; next unless result[fid]; result[id] = result[fid]}
    ids.each{|id| next if id.nil? or result[id]; result[id] = ""}

    result
  end

  articles = {}
  pmids.each do |id| 
    next if id.nil? or result_files[id].nil?
    txt = Open.read(result_files[id]) 
    next if txt.empty?
    articles[id] = Article.new(txt) 
  end

  if _array
    articles
  else
    articles.values.first
  end
end

.query(query, retmax = nil) ⇒ Object

Performs the specified query and returns an array with the PubMed Ids returned. retmax can be used to limit the number of ids returned, if is not specified 30000 is used.



15
16
17
18
19
# File 'lib/rbbt/sources/pubmed.rb', line 15

def self.query(query, retmax=nil)
  retmax ||= 30000

  Open.read("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?retmax=#{retmax}&db=pubmed&term=#{query}",:quiet => true, :nocache => true).scan(/<Id>(\d+)<\/Id>/).flatten
end