Class: RegexpCrawler::Crawler
- Inherits:
-
Object
- Object
- RegexpCrawler::Crawler
- Defined in:
- lib/regexp_crawler/crawler.rb
Instance Attribute Summary collapse
-
#continue_regexp ⇒ Object
Returns the value of attribute continue_regexp.
-
#encoding ⇒ Object
Returns the value of attribute encoding.
-
#headers ⇒ Object
Returns the value of attribute headers.
-
#model ⇒ Object
Returns the value of attribute model.
-
#named_captures ⇒ Object
Returns the value of attribute named_captures.
-
#need_parse ⇒ Object
Returns the value of attribute need_parse.
-
#save_method ⇒ Object
Returns the value of attribute save_method.
-
#start_page ⇒ Object
Returns the value of attribute start_page.
Instance Method Summary collapse
- #capture_regexp=(regexp) ⇒ Object
-
#initialize(options = {}) ⇒ Crawler
constructor
A new instance of Crawler.
- #logger=(logger) ⇒ Object
- #start ⇒ Object
Constructor Details
#initialize(options = {}) ⇒ Crawler
Returns a new instance of Crawler.
5 6 7 8 9 10 11 12 13 14 15 16 |
# File 'lib/regexp_crawler/crawler.rb', line 5 def initialize( = {}) @start_page = [:start_page] @continue_regexp = [:continue_regexp] @capture_regexp = [:capture_regexp] @named_captures = [:named_captures] @model = [:model] @save_method = [:save_method] @headers = [:headers] @encoding = [:encoding] @need_parse = [:need_parse] @logger = [:logger] == true ? Logger.new(STDOUT) : [:logger] end |
Instance Attribute Details
#continue_regexp ⇒ Object
Returns the value of attribute continue_regexp.
3 4 5 |
# File 'lib/regexp_crawler/crawler.rb', line 3 def continue_regexp @continue_regexp end |
#encoding ⇒ Object
Returns the value of attribute encoding.
3 4 5 |
# File 'lib/regexp_crawler/crawler.rb', line 3 def encoding @encoding end |
#headers ⇒ Object
Returns the value of attribute headers.
3 4 5 |
# File 'lib/regexp_crawler/crawler.rb', line 3 def headers @headers end |
#model ⇒ Object
Returns the value of attribute model.
3 4 5 |
# File 'lib/regexp_crawler/crawler.rb', line 3 def model @model end |
#named_captures ⇒ Object
Returns the value of attribute named_captures.
3 4 5 |
# File 'lib/regexp_crawler/crawler.rb', line 3 def named_captures @named_captures end |
#need_parse ⇒ Object
Returns the value of attribute need_parse.
3 4 5 |
# File 'lib/regexp_crawler/crawler.rb', line 3 def need_parse @need_parse end |
#save_method ⇒ Object
Returns the value of attribute save_method.
3 4 5 |
# File 'lib/regexp_crawler/crawler.rb', line 3 def save_method @save_method end |
#start_page ⇒ Object
Returns the value of attribute start_page.
3 4 5 |
# File 'lib/regexp_crawler/crawler.rb', line 3 def start_page @start_page end |
Instance Method Details
#capture_regexp=(regexp) ⇒ Object
18 19 20 |
# File 'lib/regexp_crawler/crawler.rb', line 18 def capture_regexp=(regexp) @capture_regexp = Regexp.new(regexp.source, regexp. | Regexp::MULTILINE) end |
#logger=(logger) ⇒ Object
22 23 24 |
# File 'lib/regexp_crawler/crawler.rb', line 22 def logger=(logger) @logger = logger == true ? Logger.new(STDOUT) : logger end |
#start ⇒ Object
26 27 28 29 30 31 32 33 34 35 36 |
# File 'lib/regexp_crawler/crawler.rb', line 26 def start @results = [] @captured_pages = [] @pages = [URI.parse(@start_page)] while !@pages.empty? and !@stop uri = @pages.shift @captured_pages << uri parse_page(uri) end @results end |