Module: OmniScrapper
- Defined in:
- lib/omni_scrapper/page.rb,
lib/omni_scrapper/result.rb,
lib/omni_scrapper/schema.rb,
lib/omni_scrapper/scrapper.rb,
lib/omni_scrapper/file_utils.rb,
lib/omni_scrapper/normalizers.rb,
lib/omni_scrapper/configuration.rb,
lib/omni_scrapper/normalizers/base.rb,
lib/omni_scrapper/scrapper_builder.rb,
lib/omni_scrapper/normalizers/phone.rb,
lib/omni_scrapper/exceptions/unknown_framework_exception.rb,
lib/omni_scrapper/exceptions/crawler_not_defined_exception.rb,
lib/omni_scrapper/exceptions/unsupported_framework_exception.rb,
lib/omni_scrapper/exceptions/invalid_field_arguments_exception.rb
Defined Under Namespace
Modules: ClassMethods, FileUtils, Normalizers
Classes: Configuration, CrawlerNotDefinedException, InvalidFieldArgumentsException, Page, Result, Schema, ScrapperBuilder, UnknownFrameworkException, UnsupportedFrameworkException
Class Method Summary
collapse
Instance Method Summary
collapse
Class Method Details
.included(base) ⇒ Object
2
3
4
|
# File 'lib/omni_scrapper/scrapper.rb', line 2
def self.included(base)
base.extend(OmniScrapper::ClassMethods)
end
|
.scrappers ⇒ Object
13
14
15
|
# File 'lib/omni_scrapper/scrapper.rb', line 13
def scrappers
ObjectSpace.each_object(Class).select { |klass| klass < self }
end
|
.setup(scrapper_name) {|config| ... } ⇒ Object
Instance Method Details
#initialize(entrypoint_url = nil) ⇒ Object
24
25
26
27
28
29
|
# File 'lib/omni_scrapper/scrapper.rb', line 24
def initialize(entrypoint_url = nil)
self.entrypoint = entrypoint_url || entrypoint_pattern
self.agent = Mechanize.new do |a|
a.user_agent_alias = 'Mac Safari'
end
end
|
#scrap_page(page) ⇒ Object
31
32
33
34
|
# File 'lib/omni_scrapper/scrapper.rb', line 31
def scrap_page(page)
data = scrapper_page_class.new(page, configuration).data
OmniScrapper::Result.new(name).tap { |result| result.build(data) }
end
|