Class: Gandalf::Worker

Inherits:
Object
  • Object
show all
Includes:
DataMapper::Resource
Defined in:
lib/gandalf/worker.rb

Overview

A magical slave

Instance Method Summary collapse

Instance Method Details

#crawl(jobs) ⇒ Object



35
36
37
38
39
40
41
42
43
44
45
# File 'lib/gandalf/worker.rb', line 35

def crawl(jobs)
  urls = jobs.keys
  feeds = Feedzirra::Feed.fetch_and_parse(urls)
  jobs.each do |url, job|
    if feeds[url].is_a? Feedzirra::Parser::RSS 
      save_posts(feeds[url], job[:id])
    else
      handle_error(job)
    end
  end
end

#handle_error(job) ⇒ Object



60
61
62
63
64
65
66
67
68
69
70
71
72
# File 'lib/gandalf/worker.rb', line 60

def handle_error(job)
  if job[:errors].is_a? Fixnum
    job[:errors] += 1
  else
    job[:errors] = 1
  end

  if job[:errors] >= max_errors
    puts job
  else
    @queue.push(job)
  end
end

#jobs_to_doObject



74
75
76
# File 'lib/gandalf/worker.rb', line 74

def jobs_to_do
  @queue.length
end

#new_jobs(count) ⇒ Object



84
85
86
87
88
89
90
91
# File 'lib/gandalf/worker.rb', line 84

def new_jobs(count)
  jobs = @queue.pop_first(count)
  hash = {}
  jobs.each do |job|
    hash[job[:url]] = job
  end
  hash
end

#push(jobs) ⇒ Object



78
79
80
81
82
# File 'lib/gandalf/worker.rb', line 78

def push(jobs)
  jobs.each do |job|
    @queue.push(job)
  end
end

#runObject



20
21
22
23
24
25
# File 'lib/gandalf/worker.rb', line 20

def run
  @crawl_scheduler = Rufus::Scheduler.start_new unless @crawl_scheduler
  @crawl_scheduler.every interval do
    crawl new_jobs(max_jobs)
  end
end

#save_posts(feed, channel_id) ⇒ Object



47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/gandalf/worker.rb', line 47

def save_posts(feed, channel_id)
  posts = @Post.parse(feed)
  posts.each do |p|
    p.channel_id = channel_id
    p.clean!
    begin
      p.save
    rescue MysqlError => err
      break
    end
  end
end

#setup(options = {:post_class => Post}) ⇒ Object



15
16
17
18
# File 'lib/gandalf/worker.rb', line 15

def setup(options = {:post_class => Post})
  @queue = RedisQueue.new(:key => self.id, :redis => options[:redis]) unless @queue
  @Post = (options[:post_class])
end

#stopObject



27
28
29
# File 'lib/gandalf/worker.rb', line 27

def stop
  @crawl_scheduler.stop
end