Class: Tango::App
- Inherits:
-
Object
- Object
- Tango::App
- Defined in:
- lib/tango/app.rb
Overview
Tango application
Instance Attribute Summary collapse
-
#config ⇒ Object
readonly
Returns the value of attribute config.
-
#dispatcher ⇒ Object
readonly
Returns the value of attribute dispatcher.
-
#link_stack ⇒ Object
readonly
Returns the value of attribute link_stack.
-
#logger ⇒ Object
readonly
Returns the value of attribute logger.
Instance Method Summary collapse
-
#after ⇒ Object
Filter run after Tango execution.
-
#before ⇒ Object
Filter run before Tango execution.
- #initialize(config: {}, link_stack: nil, dispatcher: nil, cache: nil, http_client: nil, parser: nil, db_locker: nil, logger: nil) ⇒ Tango::App constructor
-
#register_handler(handler) ⇒ Array
Register a new handler with the dispatcher.
-
#register_model(model) ⇒ Array
Register a new model.
-
#register_operator(operator) ⇒ Array
Register a new resource operator.
-
#run ⇒ Nil
Run ETL process.
Constructor Details
#initialize(config: {}, link_stack: nil, dispatcher: nil, cache: nil, http_client: nil, parser: nil, db_locker: nil, logger: nil) ⇒ Tango::App
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
# File 'lib/tango/app.rb', line 20 def initialize( config: {}, link_stack: nil, dispatcher: nil, cache: nil, http_client: nil, parser: nil, db_locker: nil, logger: nil ) # Init app properties @models = {} @operators = {} # Set config @config = config # Set dependencies @link_stack = link_stack || LinkStack.new( config['target_url'] ) @dispatcher = dispatcher || ETL::Dispatcher.new @cache = cache || Resource::Cache.new( Resource::Buffer.new ) @http_client = http_client || HTTParty @parser = parser || Nokogiri::HTML @db_locker = db_locker || DatabaseLocker.new( Multidb.databases ) @logger = logger || Logger.new( STDOUT ) @models = [] @operators = [] end |
Instance Attribute Details
#config ⇒ Object (readonly)
Returns the value of attribute config.
10 11 12 |
# File 'lib/tango/app.rb', line 10 def config @config end |
#dispatcher ⇒ Object (readonly)
Returns the value of attribute dispatcher.
10 11 12 |
# File 'lib/tango/app.rb', line 10 def dispatcher @dispatcher end |
#link_stack ⇒ Object (readonly)
Returns the value of attribute link_stack.
10 11 12 |
# File 'lib/tango/app.rb', line 10 def link_stack @link_stack end |
#logger ⇒ Object (readonly)
Returns the value of attribute logger.
10 11 12 |
# File 'lib/tango/app.rb', line 10 def logger @logger end |
Instance Method Details
#after ⇒ Object
Filter run after Tango execution
48 49 |
# File 'lib/tango/app.rb', line 48 def after end |
#before ⇒ Object
Filter run before Tango execution
44 45 |
# File 'lib/tango/app.rb', line 44 def before end |
#register_handler(handler) ⇒ Array
Register a new handler with the dispatcher
71 72 73 |
# File 'lib/tango/app.rb', line 71 def register_handler( handler ) @dispatcher.register( handler ) end |
#register_model(model) ⇒ Array
Register a new model
55 56 57 |
# File 'lib/tango/app.rb', line 55 def register_model( model ) @models << model end |
#register_operator(operator) ⇒ Array
Register a new resource operator
63 64 65 |
# File 'lib/tango/app.rb', line 63 def register_operator( operator ) @operators << operator end |
#run ⇒ Nil
Run ETL process
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 |
# File 'lib/tango/app.rb', line 84 def run # Save beginning time start_time = Time.now @logger.info "Running Tango v.#{Tango::VERSION} ..." @logger.info "Target: #{@link_stack.host}." # Use next unlocked database pick_database( @db_locker.unlocked ) @logger.info "Using database '#{@db_locker.unlocked}'." @logger.info "Truncating non persistent models ..." truncate_tables( non_persistent_models ) # Load cache for persistent models @logger.info "Loading cache ..." setup_cache( @operators ) load_cache( persistent_models ) # Run before filter @logger.info "Running before callback ..." before # Init counter of crawled links links_counter = 0 @logger.info "Tango starts crawling ..." # Start crawling website while( @link_stack.has_links? ) # Get a link from the stack link = @link_stack.shift # Skip iteration if no handler found if ! handler_klass = @dispatcher.find_handler( link ) @logger.error "No handler for link: #{link}." next end # Try to get contents of the link begin response = @http_client.get( @link_stack.host + link ) rescue StandardError => e @logger.error "Could not download contents of #{@link_stack.host + link} link." @logger.error e. next end # Continue only when response has code 200 or 201 if ! [ 200, 201 ].include?( response.code ) @logger.error "Response code for link #{link} is #{response.code}. Only codes 200 and 201 are accepted." next end # Parse response contents document = @parser.parse( response.body ) # Init handler handler = handler_klass.new( link, document, @cache ) # Append links fetched from handler @link_stack.append( handler.links ) # Try to fire the handler begin handler.trigger rescue StandardError => e # Log error @logger.error "Link: #{link}. Handler had some troubles." @logger.error e. @logger.error e.backtrace.join( "\n" ) else links_counter += 1 @logger.debug "Link: #{link}. Handler triggered successfully." end # Sleep to give crawled server time to breath sleep( @config["sleep"] || 0 ) end # Release buffers @logger.info "Releasing buffers ..." release_buffer( @cache.buffer ) # Run after filter @logger.info "Running after callback ..." after # Lock database used in this Tango iteration lock_database( @db_locker.unlocked ) # Get time of script execution ending end_time = Time.now @logger.info "Tango crawled #{links_counter}/#{@link_stack.shifted} links successfully." @logger.info "Start time: #{start_time}, end time: #{end_time}, time elapsed: #{end_time - start_time} seconds." # Close logger @logger.close end |