Class: Imdb
- Inherits:
-
Object
- Object
- Imdb
- Defined in:
- lib/imdb/imdb.rb
Constant Summary collapse
- IMDB_MOVIE_BASE_URL =
"http://www.imdb.com/title/"
- IMDB_NAME_BASE_URL =
"http://www.imdb.com/name/"
- IMDB_COMPANY_BASE_URL =
"http://www.imdb.com/company/"
- IMDB_GENRE_BASE_URL =
"http://www.imdb.com/Sections/Genres/"
- IMDB_SEARCH_BASE_URL =
"http://imdb.com/find?s=all&q="
Class Method Summary collapse
-
.find_movie_by_id(id, fetch_releaseinfos = false) ⇒ Object
Returns an ImdbMovie.
-
.search_movies_by_title(title, use_akas = nil) ⇒ Object
Returns an Array of Hashes of => String of imdb-id, :title => String of the title.
Class Method Details
.find_movie_by_id(id, fetch_releaseinfos = false) ⇒ Object
Returns an ImdbMovie.
If fetch_releasesinfos is set alternative titles and the full releases dates will be fetched.
30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
# File 'lib/imdb/imdb.rb', line 30 def self.find_movie_by_id(id, fetch_releaseinfos = false) coder = HTMLEntities.new data = Hpricot(open(IMDB_MOVIE_BASE_URL + id)) movie = ImdbMovie.new movie.imdb_id = id movie.title = coder.decode(data.at("meta[@name='title']")['content'].gsub(/\((\d{4}(\/[^)]*)?|[A-Z]+)\)/,'').strip) = (data/"div.rating/div.meta/b").inner_text if =~ /([\d\.]+)\/10/ movie. = $1 end begin movie.poster_url = data.at("div.photo/a[@name='poster']/img")['src'] rescue movie.poster_url = nil end infos = (data/"div.info") infos.each do |info| info_title = (info/"h5").inner_text case info_title when /Directors?:/ movie.directors = parse_names(info) when /Writers?[^:]+:/ movie.writers = parse_names(info) when /Company:/ movie.company = parse_company(info) when "Tagline:" movie.tagline = coder.decode(parse_info(info).strip) when "Runtime:" movie.runtime = parse_info(info).strip if (movie.runtime) movie.runtime.gsub!(/^[^:]+:\s*/, '') movie.runtime.gsub!(/min .*/, 'min') end when "Plot:" movie.plot = parse_info(info).strip movie.plot = movie.plot.gsub(/\s*\|\s*add synopsis$/, '') movie.plot = movie.plot.gsub(/\s*\|\s*full synopsis$/, '') movie.plot = movie.plot.gsub(/\s*\|\s*add summary$/, '') movie.plot = movie.plot.gsub(/full summary$/, '') movie.plot = movie.plot.gsub(/more$/, '') movie.plot = coder.decode(movie.plot.strip) when "Genre:" movie.genres = parse_genres(info) when "Release Date:" begin if (parse_info(info).strip =~ /(\d{1,2}) ([a-zA-Z]+) (\d{4})/) movie.release_date = Date.parse("#{$2} #{$1}, #{$3}") end rescue movie.release_date = nil end when "Certification:" begin movie.certification = (info/"a").map { |v| v.inner_html }.select { |v| v =~ /^USA:/ && v !~ /Unrated/ }.map { |v| v[/^USA:/]=''; v.strip }.first end end end cast = (data/"table.cast"/"tr") cast.each do |cast_member| actor_a = (cast_member/"td.nm").inner_html actor_a =~ /name\/([^"]+)\// actor_id = $1 actor_name = coder.decode((cast_member/"td.nm"/"a").inner_text) actor_role = coder.decode((cast_member/"td.char").inner_text) movie.actors = movie.actors << ImdbName.new(actor_id, actor_name, actor_role) end self.parse_releaseinfo(id, movie) if fetch_releaseinfos movie # return movie end |
.search_movies_by_title(title, use_akas = nil) ⇒ Object
Returns an Array of Hashes of => String of imdb-id, :title => String of the title.
If use_akas is set alternative titles will be included in the search.
12 13 14 15 16 17 18 19 20 21 22 23 24 25 |
# File 'lib/imdb/imdb.rb', line 12 def self.search_movies_by_title(title, use_akas = nil) document = Hpricot(open("#{IMDB_SEARCH_BASE_URL}#{CGI::escape(title)};s=tt#{";site=aka" if use_akas}").read) # we got search results if document.search('title').inner_text == "IMDb Title Search" results = document.search('a[@href^="/title/tt"]').reject do |element| element.innerHTML..empty? end.map do |element| {:imdb_id => element['href'][/tt\d+/], :title => element.innerHTML..unescape_html} end results.uniq else [{:imdb_id => document.search('link[@href^="http://www.imdb.com/title/tt"]').first['href'].match(/tt\d+/).to_s, :title => document.search('meta[@name="title"]').first["content"].gsub(/\(\d\d\d\d\)$/, '').strip}] end end |