Class: Scrapix::VBulletin

Inherits:
Object
  • Object
show all
Defined in:
lib/scrapix/vbulletin.rb

Overview

download images from a vBulletin thread

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(url = nil, options = {}) ⇒ VBulletin

Returns a new instance of VBulletin.



7
8
9
10
11
12
13
# File 'lib/scrapix/vbulletin.rb', line 7

def initialize(url = nil, options = {})
  @images                 = {}
  @agent                  = Mechanize.new
  @agent.user_agent_alias = 'Mac Safari'
  self.options            = options
  self.url                = url
end

Instance Attribute Details

#imagesObject (readonly)

Returns the value of attribute images.



5
6
7
# File 'lib/scrapix/vbulletin.rb', line 5

def images
  @images
end

#max_pagesObject (readonly)

Returns the value of attribute max_pages.



5
6
7
# File 'lib/scrapix/vbulletin.rb', line 5

def max_pages
  @max_pages
end

#optionsObject

Returns the value of attribute options.



5
6
7
# File 'lib/scrapix/vbulletin.rb', line 5

def options
  @options
end

#page_noObject (readonly)

Returns the value of attribute page_no.



5
6
7
# File 'lib/scrapix/vbulletin.rb', line 5

def page_no
  @page_no
end

#titleObject (readonly)

Returns the value of attribute title.



5
6
7
# File 'lib/scrapix/vbulletin.rb', line 5

def title
  @title
end

#urlObject

Returns the value of attribute url.



5
6
7
# File 'lib/scrapix/vbulletin.rb', line 5

def url
  @url
end

Instance Method Details

#filter_images(sources) ⇒ Object



41
42
43
44
# File 'lib/scrapix/vbulletin.rb', line 41

def filter_images(sources)
  # useful for filtering the image by sub-classes
  return sources
end

#findObject

find images for this thread, specified by starting page_no



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# File 'lib/scrapix/vbulletin.rb', line 16

def find
  reset; return @images unless @url
  @page_no = @options["start"]
  until @images.count > @options["total"] || thread_has_ended?
    page      = @agent.get "#{@url}&page=#{@page_no}"
    puts "[VERBOSE] Searching: #{@url}&page=#{@page_no}" if @options["verbose"] && options["cli"]
    sources   = page.image_urls.map{|x| x.to_s}
    sources   = filter_images sources # hook for sub-classes
    @page_no += 1
    continue if sources.empty?
    sources.each do |source|
      hash = Digest::MD5.hexdigest(source)
      unless @images.has_key?(hash)
        @images[hash] = {url: source}
        puts source if options["cli"]
      end
    end
  end
  @images = @images.map{|x, y| y}
end

#resetObject



60
61
62
63
# File 'lib/scrapix/vbulletin.rb', line 60

def reset
  @images  = {}
  @page_no = @options["start"]
end

#thread_has_ended?Boolean

Returns:

  • (Boolean)


37
38
39
# File 'lib/scrapix/vbulletin.rb', line 37

def thread_has_ended?
  @page_no > @options["end"] || @page_no > @max_pages
end