Class: DiceSearch

Inherits:

Object

Object
DiceSearch

show all

Defined in:: lib/dice/dice.rb

Overview

Class to screen scrape the dice website

seeker.dice.com

Constant Summary collapse

DEBUG = Constants

false

TITLE_CELL =

COMPANY_CELL =

LOCATION_CELL =

DATE_CELL =

CELL_COUNT =

DICE_LINK =

"http://seeker.dice.com"

Class Method Summary collapse

.get_listings(url) ⇒ Object

Retrieve the job listings.
.parse_listings(query) ⇒ Object

Parse the provided query data.
.query(location, keywords, days_back, num_entries) ⇒ Object

Query dice for html code for the query.

Class Method Details

permalink .get_listings(url) ⇒ `Object`

Retrieve the job listings

Parameters:

url (String, #read) —

the url used to query the data
an (JobListings) —

array of job listings

[View source]

# File 'lib/dice/dice.rb', line 81

def self.get_listings(url)

	# Read the data from the url
	response = open(url, "User-Agent" => "Ruby/#{RUBY_VERSION}",
				"From" => "email@addr.com",
				"Referer" => "http://hotjobs.yahoo.com/").read

	# Parse the listings from the query
	parse_listings(response)

end

permalink .parse_listings(query) ⇒ `Object`

Parse the provided query data

Parameters:

query (String, #read) —

the html web page data

[View source]

# File 'lib/dice/dice.rb', line 24

def self.parse_listings(query)

	# Create the listings
	listings = Array.new

	# Filter the data with Hpricot
	doc = Hpricot(query)

	# Get the table
	table = (doc/"//table[@class=summary]")
	
	# Get the rows
	rows = (table/"tr")
	
	# Retrieve the table rows that contain the job listings
	rows.each { |row|
	
		# Get the individual cells
		cells = (row/"td")
	
		# If this is a job listing
		if cells.size == CELL_COUNT

			# Get the fields
			name = (cells[TITLE_CELL]/"a").inner_html
			link = DICE_LINK + (cells[TITLE_CELL]/"a").attr("href")
			company = (cells[COMPANY_CELL]/"a").inner_html
			company_link = DICE_LINK + (cells[COMPANY_CELL]/"a").attr("href")
			location = cells[LOCATION_CELL].inner_html
			date = cells[DATE_CELL].inner_html

			if DEBUG
				puts "Row: count #{cells.size}"
				puts "Name: #{name}"
				puts "Link: #{link}"
				puts "Company: #{company}"
				puts "Company Link: #{company_link}"
				puts "Location: #{location}"
				puts "Date: #{date}"
			end

			# Create the job listing
			listings << JobListing.new(name, link, company, company_link, location, date, nil)

		end

	}

	# Return the listings
	return listings

end

permalink .query(location, keywords, days_back, num_entries) ⇒ `Object`

Query dice for html code for the query

Parameters:

location (String, #read) —

the location to search
keywords (String, #read) —

keywords to use for the search
days_back (String, #read) —

how long ago to search
num_entries (String, #read) —

the number of entries to request

[View source]

# File 'lib/dice/dice.rb', line 99

def self.query(location, keywords, days_back, num_entries)

	# The search URL
	url = "http://seeker.dice.com/jobsearch/servlet/JobSearch" +
		 "?caller=0&LOCATION_OPTION=2&EXTRA_STUFF=1&N=0&Hf=0" +
		 "&Ntk=JobSearchRanking&op=300" +
		 "&values=&FREE_TEXT=#{keywords}" +
		 "&Ntx=mode+matchall&WHERE=#{location}" +
		 "&WHEREList=#{location}" +
		 "&RADIUS=80.4672" +
		 "&COUNTRY=1525&STAT_PROV=0&METRO_AREA=33.78715899%2C-84.39164034&AREA_CODES=&AC_COUNTRY=1525" +
		 "&TRAVEL=0&TAXTERM=1001" +
		 "&SORTSPEC=0" +
		 "&FRMT=0" +
		 "&DAYSBACK=#{days_back}" +
		 "&NUM_PER_PAGE=#{num_entries}"

	# Read the data from the url
	open(@url, "User-Agent" => "Ruby/#{RUBY_VERSION}",
		"From" => "email@addr.com",
		"Referer" => "http://seeker.dice.com/jobsearch/").read
end

Class: DiceSearch

Overview

Constant Summary collapse

Class Method Summary collapse

Class Method Details

permalink .get_listings(url) ⇒ Object

permalink .parse_listings(query) ⇒ Object

permalink .query(location, keywords, days_back, num_entries) ⇒ Object

permalink .get_listings(url) ⇒ `Object`

permalink .parse_listings(query) ⇒ `Object`

permalink .query(location, keywords, days_back, num_entries) ⇒ `Object`