Module: JobParser

Defined in:
lib/jobparser.rb,
lib/jobparser/cache.rb,
lib/jobparser/error.rb,
lib/jobparser/regex.rb,
lib/jobparser/parser.rb,
lib/jobparser/scorer.rb,
lib/jobparser/cleaner.rb,
lib/jobparser/version.rb,
lib/jobparser/parsehtml.rb,
lib/jobparser/parseschema.rb,
lib/jobparser/facets/apply.rb,
lib/jobparser/facets/facet.rb,
lib/jobparser/facets/title.rb,
lib/jobparser/specialcases.rb,
lib/jobparser/facets/salary.rb,
lib/jobparser/cache/textfile.rb,
lib/jobparser/facets/deadline.rb,
lib/jobparser/facets/location.rb,
lib/jobparser/facets/postcode.rb,
lib/jobparser/cache/mongostore.rb,
lib/jobparser/facets/salarystring.rb

Defined Under Namespace

Modules: Error, Facets Classes: Cache, Cleaner, Match, ParseHtml, ParseSchema, Parser, Scorer, SpecialCases

Constant Summary collapse

SALARY_REGEX =
/£[\d,]*(?:.+)£[\d,]*/
SALARY_STRING_REGEX =
/£[\d,]*.+£[\d,]*(\s.*$)?/
SALARY_UP_TO_REGEX =
/(up to)(.+)£([\d,]*)/
SALARY_TITLE_REGEX =
/salary|\srate/i
VACANCY_TITLE_REGEX =
/vacancy|job title/i
JOB_TITLE_ID_REGEX =
/job(.?)title|title/i
/^apply|submit an application|application form/i
NBSP =
Nokogiri::HTML(" ").text
LOCATION_REGEX =
/(?:location: )([\D]*)$/i
SALARY_GROUP_REGEX =
/£([\d,]*)(?:.+)£([\d,]*)/
CLEAN_SALARY_REGEX =
/,|\s/
POSTCODE_REGEX =
/([A-PR-UWYZ0-9][A-HK-Y0-9][AEHMNPRTVXY0-9]?[ABEHMNPRVWXY0-9]?\s?[0-9][ABD-HJLN-UW-Z]{2}|GIR 0AA)/
JOB_TITLE_WORDS =

words commonly used in job listings - not sure if this is a good way to go but I think it’s worth a go could scope this regex just to headers

/representative|sales|nurse|manager|assistant/i
VERSION =
"0.15.1"

Class Method Summary collapse

Class Method Details

.cacheObject



53
54
55
# File 'lib/jobparser.rb', line 53

def self.cache
  @cache
end

.configObject



49
50
51
# File 'lib/jobparser.rb', line 49

def self.config
  @config
end

.configure(opts = {}) ⇒ Object



57
58
59
60
61
# File 'lib/jobparser.rb', line 57

def self.configure(opts = {})
  opts.each do |key, val|
    @config[key.to_sym] = val if @config.keys.include?(key.to_sym)
  end
end

.parse(url) ⇒ Object



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/jobparser.rb', line 31

def self.parse(url)
  if JobParser.cache.valid_for_url?(url)
    res = JobParser.cache.fetch_result_for_url(url)
    res[:schema] ? ParseSchema.new(nil, url) : ParseHtml.new(nil, url)
  else
    begin
      html = open(URI.encode(url), :allow_redirections => :safe).read
      if html.include?("http://schema.org/JobPosting")
        ParseSchema.new(html, url)
      else
        ParseHtml.new(html, url)
      end
    rescue URI::InvalidURIError
      raise JobParser::Error::InvalidUrl, "The URI given (\"#{url}\") was not valid"
    end
  end
end

.parser(url) ⇒ Object



26
27
28
29
# File 'lib/jobparser.rb', line 26

def self.parser(url)
  puts "Warning: JobParser.parser is old. Use JobParser.parse"
  JobParser.parse(url)
end