Class: DiceSearch

Inherits:
Object
  • Object
show all
Defined in:
lib/dice/dice.rb

Overview

Class to screen scrape the dice website

seeker.dice.com

Constant Summary collapse

DEBUG =

Constants

false
TITLE_CELL =
2
COMPANY_CELL =
3
LOCATION_CELL =
4
DATE_CELL =
5
CELL_COUNT =
6
"http://seeker.dice.com"

Class Method Summary collapse

Class Method Details

.get_listings(url) ⇒ Object

Retrieve the job listings

Parameters:

  • url (String, #read)

    the url used to query the data

  • an (JobListings)

    array of job listings

[View source]

81
82
83
84
85
86
87
88
89
90
91
# File 'lib/dice/dice.rb', line 81

def self.get_listings(url)

	# Read the data from the url
	response = open(url, "User-Agent" => "Ruby/#{RUBY_VERSION}",
				"From" => "email@addr.com",
				"Referer" => "http://hotjobs.yahoo.com/").read

	# Parse the listings from the query
	parse_listings(response)

end

.parse_listings(query) ⇒ Object

Parse the provided query data

Parameters:

  • query (String, #read)

    the html web page data

[View source]

24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# File 'lib/dice/dice.rb', line 24

def self.parse_listings(query)

	# Create the listings
	listings = Array.new

	# Filter the data with Hpricot
	doc = Hpricot(query)

	# Get the table
	table = (doc/"//table[@class=summary]")
	
	# Get the rows
	rows = (table/"tr")
	
	# Retrieve the table rows that contain the job listings
	rows.each { |row|
	
		# Get the individual cells
		cells = (row/"td")
	
		# If this is a job listing
		if cells.size == CELL_COUNT

			# Get the fields
			name = (cells[TITLE_CELL]/"a").inner_html
			link = DICE_LINK + (cells[TITLE_CELL]/"a").attr("href")
			company = (cells[COMPANY_CELL]/"a").inner_html
			company_link = DICE_LINK + (cells[COMPANY_CELL]/"a").attr("href")
			location = cells[LOCATION_CELL].inner_html
			date = cells[DATE_CELL].inner_html

			if DEBUG
				puts "Row: count #{cells.size}"
				puts "Name: #{name}"
				puts "Link: #{link}"
				puts "Company: #{company}"
				puts "Company Link: #{company_link}"
				puts "Location: #{location}"
				puts "Date: #{date}"
			end

			# Create the job listing
			listings << JobListing.new(name, link, company, company_link, location, date, nil)

		end

	}

	# Return the listings
	return listings

end

.query(location, keywords, days_back, num_entries) ⇒ Object

Query dice for html code for the query

Parameters:

  • location (String, #read)

    the location to search

  • keywords (String, #read)

    keywords to use for the search

  • days_back (String, #read)

    how long ago to search

  • num_entries (String, #read)

    the number of entries to request

[View source]

99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# File 'lib/dice/dice.rb', line 99

def self.query(location, keywords, days_back, num_entries)

	# The search URL
	url = "http://seeker.dice.com/jobsearch/servlet/JobSearch" +
		 "?caller=0&LOCATION_OPTION=2&EXTRA_STUFF=1&N=0&Hf=0" +
		 "&Ntk=JobSearchRanking&op=300" +
		 "&values=&FREE_TEXT=#{keywords}" +
		 "&Ntx=mode+matchall&WHERE=#{location}" +
		 "&WHEREList=#{location}" +
		 "&RADIUS=80.4672" +
		 "&COUNTRY=1525&STAT_PROV=0&METRO_AREA=33.78715899%2C-84.39164034&AREA_CODES=&AC_COUNTRY=1525" +
		 "&TRAVEL=0&TAXTERM=1001" +
		 "&SORTSPEC=0" +
		 "&FRMT=0" +
		 "&DAYSBACK=#{days_back}" +
		 "&NUM_PER_PAGE=#{num_entries}"

	# Read the data from the url
	open(@url, "User-Agent" => "Ruby/#{RUBY_VERSION}",
		"From" => "email@addr.com",
		"Referer" => "http://seeker.dice.com/jobsearch/").read
end