Class: Marvel101::Scraper

Inherits:
Object
  • Object
show all
Defined in:
lib/marvel_101/scraper.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(topic) ⇒ Scraper

Returns a new instance of Scraper.



5
6
7
8
# File 'lib/marvel_101/scraper.rb', line 5

def initialize(topic)
  @topic = topic
  @url = topic.urls[:url]
end

Instance Attribute Details

#docObject

Returns the value of attribute doc.



3
4
5
# File 'lib/marvel_101/scraper.rb', line 3

def doc
  @doc
end

#topicObject

Returns the value of attribute topic.



3
4
5
# File 'lib/marvel_101/scraper.rb', line 3

def topic
  @topic
end

Instance Method Details

#get_101Object



77
78
79
80
81
82
83
# File 'lib/marvel_101/scraper.rb', line 77

def get_101
  url_101_text = doc.css("div#MarvelVideo101 script").text
  unless url_101_text.empty?
    id = url_101_text.match(/videoId: .([-\w]*)./)[1]
    topic.urls[:url_101] = "https://www.youtube.com/watch?v=#{id}"
  end
end

#get_descriptionObject



47
48
49
50
51
52
53
# File 'lib/marvel_101/scraper.rb', line 47

def get_description
  info = doc.css("div.featured-item-desc p:nth-child(2)").text
  unless info.strip.empty?
    info = info.gsub(/\r?\n\s*([ml][oe][rs][es])?/," ").strip
    topic.description = info.gsub("â", "\'").gsub("Â", "")
  end
end

#get_detailsObject



66
67
68
69
70
71
72
73
74
75
# File 'lib/marvel_101/scraper.rb', line 66

def get_details
  topic.details = {}
  raw_details = doc.css("div.featured-item-meta")
  raw_details.css("div div").each do |raw_detail|
    detail = raw_detail.css("strong").text.downcase.strip.split(" ").join("_")
    info = raw_detail.css("p:last-child span").text.strip
    info = raw_detail.css("p:last-child").text if info.empty?
    topic.details[detail.to_sym] = info.gsub("â", "\'").gsub("Â", "")
  end
end

#get_docObject



23
24
25
# File 'lib/marvel_101/scraper.rb', line 23

def get_doc
  @doc = Nokogiri::HTML(open(@url))
end

#get_item_cardsObject



27
28
29
30
# File 'lib/marvel_101/scraper.rb', line 27

def get_item_cards
  item_cards = doc.css("div#comicsListing div.row-item")
  item_cards.empty? ? doc.css("#featured-chars div.row-item") : item_cards
end

#get_items(item_cards) ⇒ Object



32
33
34
35
36
37
38
39
40
41
42
43
44
45
# File 'lib/marvel_101/scraper.rb', line 32

def get_items(item_cards)
  topic.items = item_cards.css("div.row-item-text > h5 > a").collect do |link|
    name, url = link.text.strip, "http:#{link.attr("href")}"
    if @url.downcase.include?("team")
      Marvel101::Team.find_or_create_by_name("The #{name}", url).tap do |team|
        team.list = topic
      end
    else
      Marvel101::Character.find_or_create_by_name(name, url).tap do |char|
        char.list = topic
      end
    end
  end
end

#get_membersObject



55
56
57
58
59
60
61
62
63
64
# File 'lib/marvel_101/scraper.rb', line 55

def get_members
  members_grid = doc.css("div.grid-container").first
  topic.members = members_grid.css("div.row-item").collect do |card|
    name = card.css("a.meta-title").text.strip
    url = "http:#{card.css("a.meta-title").attr("href").value}"
    Marvel101::Character.find_or_create_by_name(name, url).tap do |member|
      member.list, member.team = topic.list, topic
    end
  end
end

#get_wikiObject



85
86
87
88
# File 'lib/marvel_101/scraper.rb', line 85

def get_wiki
  wiki_link = doc.css("div.title-section a.featured-item-notice.primary")
  topic.urls[:url_wiki] = wiki_link.attr("href").value unless wiki_link.empty?
end

#scrape_listObject



10
11
12
13
# File 'lib/marvel_101/scraper.rb', line 10

def scrape_list
  get_doc
  get_items(get_item_cards)
end

#scrape_topicObject



15
16
17
18
19
20
21
# File 'lib/marvel_101/scraper.rb', line 15

def scrape_topic
  get_doc
  get_description
  topic.team? ? get_members : get_details
  get_101
  get_wiki
end