Class: ESearchy::OtherEngines::Spider

Inherits:
GenericEngine show all
Defined in:
lib/esearchy/OtherEngines/spider.rb

Constant Summary collapse

ENGINE =

Do not really need any of them.

""
PORT =
0
NUM =
0
TYPE =
1

Instance Attribute Summary

Attributes inherited from GenericEngine

#documents, #emails, #people, #results

Instance Method Summary collapse

Methods inherited from GenericEngine

#company=, #initialize, #maxhits=, #start=

Constructor Details

This class inherits a constructor from ESearchy::GenericEngine

Instance Method Details

#parse(html) ⇒ Object



34
35
36
37
38
39
# File 'lib/esearchy/OtherEngines/spider.rb', line 34

def parse( html )
  array = html.scan(/href=["|']([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)["|']/).map! do |r|
    r[0].match(/http:\/\/|https:\/\/|ftp:\/\//) ? r : [website() + r[0]]
  end
  super array
end

#searchObject



12
13
14
15
16
17
18
19
20
# File 'lib/esearchy/OtherEngines/spider.rb', line 12

def search 
  Spidr.site(website()) do |spider|
    spider.every_page do |page|
      D page.url
      crawler(page.body)
      parse(page.body)
    end
  end
end

#websiteObject



22
23
24
25
26
27
28
# File 'lib/esearchy/OtherEngines/spider.rb', line 22

def website
  begin
    ESearchy::Search.website || @website
  rescue
    raise ESearchyMissingWebsite, "Mssing website url Object.website=(value)"
  end
end

#website=(v) ⇒ Object



30
31
32
# File 'lib/esearchy/OtherEngines/spider.rb', line 30

def website=(v)
  @website=v
end