Class: Marvel101::Scraper
- Inherits:
-
Object
- Object
- Marvel101::Scraper
- Defined in:
- lib/marvel_101/scraper.rb
Instance Attribute Summary collapse
-
#doc ⇒ Object
Returns the value of attribute doc.
-
#topic ⇒ Object
Returns the value of attribute topic.
Instance Method Summary collapse
- #get_101 ⇒ Object
- #get_description ⇒ Object
- #get_details ⇒ Object
- #get_doc ⇒ Object
- #get_item_cards ⇒ Object
- #get_items(item_cards) ⇒ Object
- #get_members ⇒ Object
- #get_wiki ⇒ Object
-
#initialize(topic) ⇒ Scraper
constructor
A new instance of Scraper.
- #scrape_list ⇒ Object
- #scrape_topic ⇒ Object
Constructor Details
#initialize(topic) ⇒ Scraper
Returns a new instance of Scraper.
5 6 7 8 |
# File 'lib/marvel_101/scraper.rb', line 5 def initialize(topic) @topic = topic @url = topic.urls[:url] end |
Instance Attribute Details
#doc ⇒ Object
Returns the value of attribute doc.
3 4 5 |
# File 'lib/marvel_101/scraper.rb', line 3 def doc @doc end |
#topic ⇒ Object
Returns the value of attribute topic.
3 4 5 |
# File 'lib/marvel_101/scraper.rb', line 3 def topic @topic end |
Instance Method Details
#get_101 ⇒ Object
77 78 79 80 81 82 83 |
# File 'lib/marvel_101/scraper.rb', line 77 def get_101 url_101_text = doc.css("div#MarvelVideo101 script").text unless url_101_text.empty? id = url_101_text.match(/videoId: .([-\w]*)./)[1] topic.urls[:url_101] = "https://www.youtube.com/watch?v=#{id}" end end |
#get_description ⇒ Object
47 48 49 50 51 52 53 |
# File 'lib/marvel_101/scraper.rb', line 47 def get_description info = doc.css("div.featured-item-desc p:nth-child(2)").text unless info.strip.empty? info = info.gsub(/\r?\n\s*([ml][oe][rs][es])?/," ").strip topic.description = info.gsub("â", "\'").gsub("Â", "") end end |
#get_details ⇒ Object
66 67 68 69 70 71 72 73 74 75 |
# File 'lib/marvel_101/scraper.rb', line 66 def get_details topic.details = {} raw_details = doc.css("div.featured-item-meta") raw_details.css("div div").each do |raw_detail| detail = raw_detail.css("strong").text.downcase.strip.split(" ").join("_") info = raw_detail.css("p:last-child span").text.strip info = raw_detail.css("p:last-child").text if info.empty? topic.details[detail.to_sym] = info.gsub("â", "\'").gsub("Â", "") end end |
#get_doc ⇒ Object
23 24 25 |
# File 'lib/marvel_101/scraper.rb', line 23 def get_doc @doc = Nokogiri::HTML(open(@url)) end |
#get_item_cards ⇒ Object
27 28 29 30 |
# File 'lib/marvel_101/scraper.rb', line 27 def get_item_cards item_cards = doc.css("div#comicsListing div.row-item") item_cards.empty? ? doc.css("#featured-chars div.row-item") : item_cards end |
#get_items(item_cards) ⇒ Object
32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
# File 'lib/marvel_101/scraper.rb', line 32 def get_items(item_cards) topic.items = item_cards.css("div.row-item-text > h5 > a").collect do |link| name, url = link.text.strip, "http:#{link.attr("href")}" if @url.downcase.include?("team") Marvel101::Team.find_or_create_by_name("The #{name}", url).tap do |team| team.list = topic end else Marvel101::Character.find_or_create_by_name(name, url).tap do |char| char.list = topic end end end end |
#get_members ⇒ Object
55 56 57 58 59 60 61 62 63 64 |
# File 'lib/marvel_101/scraper.rb', line 55 def get_members members_grid = doc.css("div.grid-container").first topic.members = members_grid.css("div.row-item").collect do |card| name = card.css("a.meta-title").text.strip url = "http:#{card.css("a.meta-title").attr("href").value}" Marvel101::Character.find_or_create_by_name(name, url).tap do |member| member.list, member.team = topic.list, topic end end end |
#get_wiki ⇒ Object
85 86 87 88 |
# File 'lib/marvel_101/scraper.rb', line 85 def get_wiki wiki_link = doc.css("div.title-section a.featured-item-notice.primary") topic.urls[:url_wiki] = wiki_link.attr("href").value unless wiki_link.empty? end |
#scrape_list ⇒ Object
10 11 12 13 |
# File 'lib/marvel_101/scraper.rb', line 10 def scrape_list get_doc get_items(get_item_cards) end |
#scrape_topic ⇒ Object
15 16 17 18 19 20 21 |
# File 'lib/marvel_101/scraper.rb', line 15 def scrape_topic get_doc get_description topic.team? ? get_members : get_details get_101 get_wiki end |