Module: Whitepaper::Engine::IEEEXplore

Defined in:
lib/whitepaper/engine/ieeexplore.rb

Overview

This engine uses the IEEEXplore database to query metadata about a paper.

Constant Summary collapse

DOMAIN =

The domain for IEEEXplore.

"http://ieeexplore.ieee.org"
SEARCH_BY_TITLE_URL =

The url to use to search by title keywords.

"search/searchresult.jsp?reload=true&newsearch=true&queryText={title}&x=60&y=7"

Class Method Summary collapse

Class Method Details

.find_by_title(title) ⇒ Object

Returns a Whitespace::Paper by searching for the paper with the given title keywords.



18
19
20
21
22
23
24
25
26
27
28
# File 'lib/whitepaper/engine/ieeexplore.rb', line 18

def find_by_title(title)
  @agent = Mechanize.new
  page = @agent.get "#{find_by_title_url(title)}"

  # get the first link
  paper = page.search '//div[@class="detail"]/h3/a'

  paper_link = "#{DOMAIN}#{paper.first.attribute("href")}"

  retrieve_details paper_link
end

.find_by_title_url(title) ⇒ Object

Returns a url that will query for the given title keywords



13
14
15
# File 'lib/whitepaper/engine/ieeexplore.rb', line 13

def find_by_title_url(title)
  "#{DOMAIN}/#{SEARCH_BY_TITLE_URL.gsub(/\{title\}/, title.gsub(/\s/, "+"))}"
end

.retrieve_details(url) ⇒ Object

Returns a Whitespace::Paper by reading the direct page for a particular paper.



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# File 'lib/whitepaper/engine/ieeexplore.rb', line 31

def retrieve_details(url)
  @agent = Mechanize.new

  page = @agent.get url

  get_meta = lambda {|name|
    meta = page.search "//meta[@property=\"#{name}\"]"
    if meta.nil? or meta.first.nil?
      return ""
    end
    meta.first.attribute("content").to_s
  }

  keywords_raw = get_meta.call("keywords")
  title = get_meta.call("citation_title")
  year = get_meta.call("citation_date")
  year = year[-4..-1] unless year.empty?
  conference = get_meta.call("citation_conference")

  authors = []
  meta = page.search "//meta[@property=\"citation_author\"]"
  meta.each do |e|
    authors << e.attribute("content").to_s.strip
  end

  keywords = keywords_raw.to_s.split(';').map(&:strip)

  links = []
  ps_links = []

  Paper.new title, authors, {:description  => "",
                             :keywords     => keywords,
                             :year         => year,
                             :conference   => conference,
                             :metadata_url => url,
                             :pdf_urls     => links,
                             :ps_urls      => ps_links}
end