Class: Scrape

Inherits:
Object
  • Object
show all
Defined in:
lib/musicscrape.rb

Instance Method Summary collapse

Constructor Details

#initialize(source_type = :web) ⇒ Scrape

Scrape is meant to parse www.thestranger.com/music and pull out info on the recommended shows initialize with source_type = :web and url = “www.thestranger.com/music



18
19
20
21
# File 'lib/musicscrape.rb', line 18

def initialize(source_type = :web)
  @source_type = source_type
  @url="http://www.thestranger.com/seattle/Music"
end

Instance Method Details

#formatted_outputObject



22
23
24
25
26
27
28
29
30
31
32
# File 'lib/musicscrape.rb', line 22

def formatted_output
    #gives you the same output you get from the command line
    @scrape = Scrape.new
    @scrape.get_music_listings.each do |lst|
      puts lst[:title]
      puts lst[:venue]
      puts lst[:date]
      puts lst[:details]
      puts "\n"
  end
end

#get_events(text_in) ⇒ Object



61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# File 'lib/musicscrape.rb', line 61

def get_events(text_in)
  #parses the html after it has been pared down and returns an array of hashes containing the data
  #returns [[:title=> "", :venue=>"", :date=>"", :details=>""], ...]
  @music_text = text_in
  #while there are still events left do this
  event_array = []
  partition_array = [[:title,'</a><br/>'],[:venue,'<br />'],[:date,'<br />'],[:details,'</li>']] #fields we want to get and what to use for partition
  n=0
  while @music_text.include? "http://www.thestranger.com/seattle/Event?event="
    event_hash = {}
    @music_text = @music_text.partition(/\bevent=\b\d*.*/)[2] #pull off the stuff at the front
    partition_array.each do |id|
      my_array = @music_text.partition(id[1])
      event_hash[id[0]] = my_array[0].strip #the bit before the partition
      @music_text = my_array[2] #the bit after the partition
    end
    event_hash[:details]= event_hash[:details].split().join(",")
    event_array[n] = event_hash
    n+=1
  end
  event_array
end

#get_music_listingsObject



34
35
36
37
38
39
# File 'lib/musicscrape.rb', line 34

def get_music_listings
  #Gives you an array of hashes
  @scrape = Scrape.new(:web)
  @pruned_page = @scrape.remove_excess(@scrape.load_page)
  @scrape.get_events(@pruned_page) #returns an array of hashes
end

#load_pageObject



41
42
43
44
45
46
47
48
# File 'lib/musicscrape.rb', line 41

def load_page
  #Uses RestClient to get the html
  if @source_type == :web then
    (RestClient.get(@url))
  else
    open("#{File.dirname(__FILE__)}/stranger.txt", &:read)
  end
end

#open_localObject



56
57
58
59
# File 'lib/musicscrape.rb', line 56

def open_local
  #opens a local file for testing
  open("#{File.dirname(__FILE__)}/stranger.txt", &:read)
end

#remove_excess(text_in) ⇒ Object



50
51
52
53
54
# File 'lib/musicscrape.rb', line 50

def remove_excess(text_in)
  #removes everthing except the recommended events section
  text_in = text_in.partition("<h2 class=\"sitesection\">Recommended Music Events</h2>").drop(1).join
  text_in.partition("<li class=")[0..1].join.strip
end