Class: ImdbCelebrity::Parser::HpricotParser::SearchParser

Inherits:
HpricotParser
  • Object
show all
Defined in:
lib/imdb_celebrity/parser/hpricot_parser/search_parser.rb

Instance Attribute Summary

Attributes inherited from HpricotParser

#page

Instance Method Summary collapse

Constructor Details

#initialize(uri) ⇒ SearchParser

Returns a new instance of SearchParser.



7
8
9
# File 'lib/imdb_celebrity/parser/hpricot_parser/search_parser.rb', line 7

def initialize uri
  super uri
end

Instance Method Details

#exact_match?Boolean

Returns true if search returns specificly only one result, exact match

Returns:

  • (Boolean)


12
13
14
15
# File 'lib/imdb_celebrity/parser/hpricot_parser/search_parser.rb', line 12

def exact_match?
  #!document.at("//h3[text()^='Overview'/..]").nil?
  !document.at("//h3[text()^='Overview']/..").nil?
end

#parse_celebrities(parser) ⇒ Object



17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# File 'lib/imdb_celebrity/parser/hpricot_parser/search_parser.rb', line 17

def parse_celebrities parser
  document.search('a[@href^="/name/nm"]').reject do |element|
    element.innerHTML.imdb_strip_tags.empty? ||
    element.parent.innerHTML =~ /media from/i
  end.map do |element|
    id = element['href'][/\d+/]

    data = element.parent.innerHTML.split("<br />")
    if !data[0].nil? && !data[1].nil? && data[0] =~ /img/
      title = data[1]
    else
      title = data[0]
    end

    title = title.imdb_strip_tags.imdb_unescape_html
    title.gsub!(/\s+\(\d{4}\)$/, '')

    [id, title, parser]
  end
end