Class: MysqlTruck::Loader

Inherits:

Object

Object
MysqlTruck::Loader

show all

Includes:: FileUtils, Helper

Defined in:: lib/mysql_truck/loader.rb

Instance Method Summary collapse

#backups ⇒ Object

Get a list of backups stored on S3.
#download_file(key) ⇒ Object
#execute_sql_file(table, backup_date_str, file_path) ⇒ Object
#import_csv_file(table, backup_date_str, file_path) ⇒ Object
#import_tsv_file(table, backup_date_str, file_path) ⇒ Object
#initialize(config) ⇒ Loader constructor

A new instance of Loader.
#load_backup ⇒ Object
#only_tables ⇒ Object

only import these tables schema+data.
#should_download_file?(filename) ⇒ Boolean
#skip_data_for_tables ⇒ Object

only import schema for these tables.
#smartly? ⇒ Boolean

Methods included from Helper

#base_path, #benchmark, #bucket_dir, #config, #csv_options, #db_connection_options, #formatted_time, #initialize_directories, #initialize_s3, #local_host?, #remote_host?, #remove_directories, #s3_path, #tmp_path

Constructor Details

#initialize(config) ⇒ `Loader`

Returns a new instance of Loader.

# File 'lib/mysql_truck/loader.rb', line 6

def initialize(config)
  @config = config
  initialize_s3
end

Instance Method Details

#backups ⇒ `Object`

Get a list of backups stored on S3.

Returns an array of s3 paths that look like:

mysql/YYYY-MM-DD

Array elements are sorted with the latest date first.

# File 'lib/mysql_truck/loader.rb', line 232

def backups
  unless @backups
    @backups = []
    # Backups are stored in the mysql/ directory
    @bucket.s3.interface.incrementally_list_bucket(@bucket.name, {
      :prefix => "#{bucket_dir}", :delimiter => "/"
    }) do |item|
      @backups += item[:common_prefixes]
    end
    @backups = @backups.sort { |a,b| b <=> a }
  end

  @backups
end

#download_file(key) ⇒ `Object`

# File 'lib/mysql_truck/loader.rb', line 165

def download_file(key)
  filename = File.basename(key.name)

  unless should_download_file?(filename)
    return
  end

  puts "\n#{key}"

  file = tmp_path.join(filename)
  unzipped_file = tmp_path.join(file.basename(".gz"))
  if !smartly? || (smartly? && !unzipped_file.exist?)
    print " - Downloading... "

    benchmark do
      file.open("wb") do |f|
        @bucket.s3.interface.get(@bucket.name, key.name) do |chunk|
          f.write chunk
        end
      end
    end
  else
    puts " already downloaded."
  end

  filename
end

#execute_sql_file(table, backup_date_str, file_path) ⇒ `Object`

# File 'lib/mysql_truck/loader.rb', line 124

def execute_sql_file(table, backup_date_str, file_path)
  cat_cmd = "cat #{file_path}"
  sed_cmd = config[:date_suffix] ? "sed 's/`#{table}`/`#{table}_#{backup_date_str.gsub(/-/, "")}`/g'" : nil
  sed2_cmd = config[:date_suffix] ? "sed 's/TABLE #{table}/TABLE #{table}_#{backup_date_str.gsub(/-/, "")}/g'" : nil
  import_cmd =  "mysql #{db_connection_options}"

  benchmark do
    `#{[ cat_cmd, sed_cmd, sed2_cmd, import_cmd].compact.join(' | ')}`
  end
end

#import_csv_file(table, backup_date_str, file_path) ⇒ `Object`

# File 'lib/mysql_truck/loader.rb', line 135

def import_csv_file(table, backup_date_str, file_path)
  if config[:date_suffix]
    old_file_path = file_path
    file_path = file_path.to_s.gsub("/#{table}.", "/#{table}_#{backup_date_str.gsub(/-/, "")}.")
    # move file
    `mv #{old_file_path} #{file_path}`
  end

  benchmark do
    `mysqlimport --local --compress #{csv_options} #{db_connection_options} #{file_path}`
  end

  file_path
end

#import_tsv_file(table, backup_date_str, file_path) ⇒ `Object`

# File 'lib/mysql_truck/loader.rb', line 150

def import_tsv_file(table, backup_date_str, file_path)
  if config[:date_suffix]
    old_file_path = file_path
    file_path = file_path.to_s.gsub("/#{table}.data.", "/#{table}_#{backup_date_str.gsub(/-/, "")}.")
    # move file
    `mv #{old_file_path} #{file_path}`
  end

  benchmark do
    `mysqlimport --local --compress #{db_connection_options} #{file_path}`
  end

  file_path
end

#load_backup ⇒ `Object`

# File 'lib/mysql_truck/loader.rb', line 21

def load_backup
  # Default to latest backup
  backup_date_str = config[:backup_date] || backups.first.split("/").last

  # Set directory where backup is downloaded to
  @time = Time.new(*backup_date_str.split("-"))
  initialize_directories

  puts "Download & decompressing backups"
  puts "-------------------"

  @bucket.keys(:prefix => s3_path).each do |key|
    next if key.to_s.match(/\/$/)
    next unless (filename = download_file(key))

    if tmp_path.join(filename).exist?
      print " - Inflating #{filename} ... "

      if File.extname(filename).match(".lzo")
        decompress_cmd = "lzop -d -f -U"
      else
        decompress_cmd = "gunzip -f"
      end
      benchmark { `#{decompress_cmd} #{tmp_path.join(filename)}` }
    end
  end

  # Load data
  puts "\nLoading schema and data by table"
  puts "--------------------------------"

  files = Dir["#{tmp_path}/*"]
  tables = files.map { |f| File.basename(f).gsub(/\..*$/, '') }.uniq
  total = tables.size
  count = 0

  # Find all table names and process
  tables.sort.each do |table|
    count += 1
    puts "\nProcessing #{table} (#{count}/#{total})"

    old_schema_file = tmp_path.join("#{table}.sql")
    schema_file     = tmp_path.join("#{table}.no_index.sql")
    index_file      = tmp_path.join("#{table}.indices.sql")
    data_file       = tmp_path.join("#{table}.data.sql")
    csv_data_file   = tmp_path.join("#{table}.csv")
    tsv_data_file   = tmp_path.join("#{table}.data.tsv")

    # Create table
    if schema_file.exist?
      print " - Loading schema: #{File.basename(schema_file)} ... "
      execute_sql_file(table, backup_date_str, schema_file)
      schema_file.delete

    elsif old_schema_file.exist?
      print " - Loading schema: #{File.basename(old_schema_file)} ... "
      execute_sql_file(table, backup_date_str, old_schema_file)
      old_schema_file.delete

    else
      puts "ERROR: no schema file!"
      next
    end

    # Load data
    if !config[:skip_all_data]
      if data_file.exist?
        print " - Importing #{File.basename(data_file)} ... "
        execute_sql_file(table, backup_date_str, data_file)
        data_file.delete

      elsif csv_data_file.exist?
        print " - Importing #{File.basename(csv_data_file)} ... "
        csv_data_file = import_csv_file(table, backup_date_str, csv_data_file)
        File.delete(csv_data_file)

      elsif tsv_data_file.exist?
        print " - Importing #{File.basename(tsv_data_file)} ... "
        tsv_data_file = import_tsv_file(table, backup_date_str, tsv_data_file)
        File.delete(tsv_data_file)
      end
    end

    # Add indices
    if index_file.exist?
      print " - Adding indices: #{File.basename(index_file)} ... "
      execute_sql_file(table, backup_date_str, index_file)
      index_file.delete
    end

  end

  puts "Backup loaded."

  # This isn't in an ensure block because we want to keep around
  # downloads if there's a failure importing a table.
  # remove_directories

rescue Exception => e
  puts e.message
  puts e.backtrace.join("\n")
end

#only_tables ⇒ `Object`

only import these tables schema+data



17
18
19

# File 'lib/mysql_truck/loader.rb', line 17

def only_tables
  config[:only_tables] || []
end

#should_download_file?(filename) ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/mysql_truck/loader.rb', line 193

def should_download_file?(filename)
  table_name = filename.gsub(/\..*\..*$/, '')
  if only_tables.empty? and skip_data_for_tables.empty? and !config[:skip_all_data]
    return true
  end

  # If we're targetting specific tables, then we always want both
  # schema and csv files.
  if !only_tables.empty?
    return only_tables.include?(table_name)
  end

  if filename.match(/\.data\.(sql|tsv)\.(lzo|gz)$/)
    return false if config[:skip_all_data]
    is_data = true
    is_schema = false
  else
    is_data = false
    is_schema = true
  end

  return true if is_schema && config[:skip_all_data]

  if !skip_data_for_tables.empty?
    if is_schema || (is_data && !skip_data_for_tables.include?(table_name))
      return true
    end
  end

  false
end

#skip_data_for_tables ⇒ `Object`

only import schema for these tables



12
13
14

# File 'lib/mysql_truck/loader.rb', line 12

def skip_data_for_tables
  config[:skip_data_for_tables] || []
end

#smartly? ⇒ `Boolean`

Returns:

(Boolean)



247
248
249

# File 'lib/mysql_truck/loader.rb', line 247

def smartly?
  config[:smartly]
end

Class: MysqlTruck::Loader

Instance Method Summary collapse

Methods included from Helper

Constructor Details

#initialize(config) ⇒ Loader

Instance Method Details

#backups ⇒ Object

#download_file(key) ⇒ Object

#execute_sql_file(table, backup_date_str, file_path) ⇒ Object

#import_csv_file(table, backup_date_str, file_path) ⇒ Object

#import_tsv_file(table, backup_date_str, file_path) ⇒ Object

#load_backup ⇒ Object

#only_tables ⇒ Object

#should_download_file?(filename) ⇒ Boolean

#skip_data_for_tables ⇒ Object

#smartly? ⇒ Boolean