Class: Mortar::Local::Pig

Inherits:
Object
  • Object
show all
Includes:
Helpers, InstallUtil, Params
Defined in:
lib/mortar/local/pig.rb

Constant Summary collapse

PIG_LOG_FORMAT =
"humanreadable"
LIB_TGZ_NAME =
"lib-common.tar.gz"
PIG_COMMON_LIB_URL_PATH =
"resource/lib_common"
DEFAULT_PIGOPTS_FILES =

This needs to be defined for watchtower.

%w(
    /lib-common/conf/pig-hawk-global.properties
    /lib-common/conf/pig-cli-local-dev.properties
)

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Helpers

#action, #ask, #confirm, #copy_if_not_present_at_dest, #default_host, #deprecate, #display, #display_header, #display_object, #display_row, #display_table, #display_with_indent, #download_to_file, #ensure_dir_exists, #error, error_with_failure, error_with_failure=, extended, extended_into, #format_bytes, #format_date, #format_with_bang, #full_host, #get_terminal_environment, #home_directory, #host, #hprint, #hputs, included, included_into, #installed_with_omnibus?, #json_decode, #json_encode, #line_formatter, #longest, #output_with_bang, #pending_github_team_state_message, #quantify, #redisplay, #retry_on_exception, #running_on_a_mac?, #running_on_windows?, #set_buffer, #shell, #spinner, #status, #string_distance, #styled_array, #styled_error, #styled_hash, #styled_header, #suggestion, #test_name, #ticking, #time_ago, #truncate, #warning, #with_tty, #write_to_file

Methods included from Params

#automatic_parameters, #merge_parameters

Methods included from InstallUtil

#download_file, #ensure_mortar_local_directory, #extract_tgz, #get_resource, #gitignore_template_path, #head_resource, #http_date_to_epoch, #install_date, #install_file_for, #is_newer_version, #jython_cache_directory, #jython_directory, #local_install_directory, #local_install_directory_name, #local_log_dir, #local_project_gitignore, #local_udf_log_dir, #make_call, #make_call_sleep_seconds, #note_install, #osx?, #project_root, #render_script_template, #reset_local_logs, #run_templated_script, #unset_hadoop_env_vars, #url_date

Constructor Details

#initializePig

Returns a new instance of Pig.



56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# File 'lib/mortar/local/pig.rb', line 56

def initialize
  @temp_file_objects = []

  @resource_locations = {
    "illustrate_template" => File.expand_path("../../templates/report/illustrate-report.html", __FILE__),
    "illustrate_css" => File.expand_path("../../../../css/illustrate.css", __FILE__),
    "jquery" => File.expand_path("../../../../js/jquery-1.7.1.min.js", __FILE__),
    "jquery_transit" => File.expand_path("../../../../js/jquery.transit.js", __FILE__),
    "jquery_stylestack" => File.expand_path("../../../../js/jquery.stylestack.js", __FILE__),
    "mortar_table" => File.expand_path("../../../../js/mortar-table.js", __FILE__),
    "zeroclipboard" => File.expand_path("../../../../js/zero_clipboard.js", __FILE__),
    "zeroclipboard_swf" => File.expand_path("../../../../flash/zeroclipboard.swf", __FILE__)
}

@resource_destinations = {
    "illustrate_html" => "illustrate-output/illustrate-output.html",
    "illustrate_css" => "illustrate-output/resources/css/illustrate-output.css",
    "jquery" => "illustrate-output/resources/js/jquery-1.7.1.min.js",
    "jquery_transit" => "illustrate-output/resources/js/jquery.transit.js",
    "jquery_stylestack" => "illustrate-output/resources/js/jquery.stylestack.js",
    "mortar_table" => "illustrate-output/resources/js/mortar-table.js",
    "zeroclipboard" => "illustrate-output/resources/js/zero_clipboard.js",
    "zeroclipboard_swf" => "illustrate-output/resources/flash/zeroclipboard.swf"
}
end

Instance Attribute Details

#resource_destinationsObject

Returns the value of attribute resource_destinations.



54
55
56
# File 'lib/mortar/local/pig.rb', line 54

def resource_destinations
  @resource_destinations
end

#resource_locationsObject

Returns the value of attribute resource_locations.



53
54
55
# File 'lib/mortar/local/pig.rb', line 53

def resource_locations
  @resource_locations
end

Instance Method Details

#automatic_pig_parametersObject



423
424
425
426
# File 'lib/mortar/local/pig.rb', line 423

def automatic_pig_parameters
  warn "[DEPRECATION] Please call automatic_parameters instead"
  automatic_parameters
end

#command(pig_version) ⇒ Object



82
83
84
# File 'lib/mortar/local/pig.rb', line 82

def command(pig_version)
  return File.join(pig_directory(pig_version), "bin", "pig")
end

#create_illustrate_output_pathObject

Create a temp file to be used for writing the illustrate json output, and return it’s path. This data file will later be used to create the result html output. Tempfile will take care of cleaning up the file when we exit.



209
210
211
212
213
214
215
# File 'lib/mortar/local/pig.rb', line 209

def create_illustrate_output_path
  # Using Tempfile for the path generation and so that the
  # file will be cleaned up on process exit
  outfile = Tempfile.new("mortar-illustrate-output")
  outfile.close(false)
  outfile.path
end

#create_illustrate_template_parameters(illustrate_data) ⇒ Object



265
266
267
268
269
270
# File 'lib/mortar/local/pig.rb', line 265

def create_illustrate_template_parameters(illustrate_data)
  params = {}
  params['tables'] = illustrate_data['tables']
  params['udf_output'] = illustrate_data['udf_output']
  return params
end

#decode_illustrate_input_file(illustrate_outpath) ⇒ Object

Given a file path, open it and decode the containing text



218
219
220
221
222
223
224
225
226
227
# File 'lib/mortar/local/pig.rb', line 218

def decode_illustrate_input_file(illustrate_outpath)
  data_raw = File.read(illustrate_outpath)
  begin
    data_encoded = data_raw.encode('UTF-8', 'binary', :invalid => :replace, :undef => :replace, :replace => '')
  rescue NoMethodError
    require 'iconv'
    ic = Iconv.new('UTF-8//IGNORE', 'UTF-8')
    data_encoded = ic.iconv(data_raw)
  end
end

#illustrate_alias(pig_script, pig_alias, skip_pruning, no_browser, pig_version, pig_parameters) ⇒ Object



272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
# File 'lib/mortar/local/pig.rb', line 272

def illustrate_alias(pig_script, pig_alias, skip_pruning, no_browser, pig_version, pig_parameters)
  cmd = "-e 'illustrate "

  # Parameters have to be entered with the illustrate command (as
  # apposed to as a command line argument) or it will result in an
  # 'Undefined parameter' error.
  param_file = make_pig_param_file(pig_parameters)
  cmd += "-param_file #{param_file} "

  # Now point us at the script/alias to illustrate
  illustrate_outpath = create_illustrate_output_path()
  cmd += "-script #{pig_script.path} -out #{illustrate_outpath} "

  if skip_pruning
    cmd += " -skipPruning "
  end

  if no_browser
    cmd += " -str '"
  else
    cmd += " -json '"
  end

  if pig_alias
    cmd += " #{pig_alias} "
  end

  result = run_pig_command(cmd, pig_version, [], false)
  if result
    if no_browser
      display decode_illustrate_input_file(illustrate_outpath)
    else
      show_illustrate_output_browser(illustrate_outpath)
    end
  end
end

#install_libObject



165
166
167
168
169
170
171
172
173
174
175
176
177
178
# File 'lib/mortar/local/pig.rb', line 165

def install_lib
  #Delete the directory if it already exists to ensure cruft isn't left around.
  if File.directory? lib_directory
    FileUtils.rm_rf lib_directory
  end

  FileUtils.mkdir_p(local_install_directory)
  local_tgz = File.join(local_install_directory, LIB_TGZ_NAME)
  download_file(lib_archive_url, local_tgz)
  extract_tgz(local_tgz, local_install_directory)

  File.delete(local_tgz)
  note_install("lib-common")
end

#install_or_update(pig_version, command = nil) ⇒ Object



123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# File 'lib/mortar/local/pig.rb', line 123

def install_or_update(pig_version, command=nil)
  if should_do_pig_install?(pig_version)
    action "Installing #{pig_version.pig_version} to #{local_install_directory_name}" do
      install_pig(pig_version, command)
    end
  elsif should_do_pig_update?(pig_version, command)
    action "Updating to latest #{pig_version.pig_version} in #{local_install_directory_name}" do
      install_pig(pig_version)
    end
  end

  if should_do_lib_install?
    action "Installing pig dependencies to #{local_install_directory_name}" do
      install_lib()
    end
  elsif should_do_lib_update?
    action "Updating to latest pig dependencies in #{local_install_directory_name}" do
      install_lib()
    end
  end
end

#install_pig(pig_version, command = nil) ⇒ Object

Installs pig for this project if it is not already present



146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
# File 'lib/mortar/local/pig.rb', line 146

def install_pig(pig_version, command=nil)
  #Delete the directory if it already exists to ensure cruft isn't left around.
  if File.directory? pig_directory(pig_version)
    FileUtils.rm_rf pig_directory(pig_version)
  end

  FileUtils.mkdir_p(local_install_directory)
  local_tgz = File.join(local_install_directory, pig_version.tgz_name)
  download_file(pig_archive_url(pig_version), local_tgz, command)
  extract_tgz(local_tgz, local_install_directory)

  # This has been seening coming out of the tgz w/o +x so we do
  # here to be sure it has the necessary permissions
  FileUtils.chmod(0755, command(pig_version))

  File.delete(local_tgz)
  note_install(pig_version.pig_version)
end

#launch_repl(pig_version, pig_parameters) ⇒ Object



184
185
186
187
188
189
190
191
192
193
194
195
196
197
# File 'lib/mortar/local/pig.rb', line 184

def launch_repl(pig_version, pig_parameters)
  # The REPL is very likely to be run outside a mortar project and almost equally as likely
  # to be run in the users home directory.  The default log4j config file references pig log
  # file as being ../logs/local-pig.log, which is a path relative to the 'pigscripts' directory.
  # Since we very likely aren't going be run from a mortar project we won't have a pigscripts
  # directory to cd into, so log4j spits out an ugly error message when it doesn't have permissions
  # to create /home/logs/local-pig.log. So to work around this we copy the log4j configuration and
  # overwrite the log file to no longer be relative.
  File.open(log4j_conf_no_project, 'w') do |out|
    out << File.open(log4j_conf).read.gsub(/log4j.appender.LogFileAppender.File=.*\n/,
                                      "log4j.appender.LogFileAppender.File=local-pig.log\n")
  end
  run_pig_command(" ", pig_version, pig_parameters)
end

#lib_archive_urlObject



99
100
101
102
# File 'lib/mortar/local/pig.rb', line 99

def lib_archive_url
  default_url = full_host + "/" + PIG_COMMON_LIB_URL_PATH
  ENV.fetch('COMMON_LIB_DISTRO_URL', default_url)
end

#lib_directoryObject



90
91
92
# File 'lib/mortar/local/pig.rb', line 90

def lib_directory
  return File.join(local_install_directory, "lib-common")
end

#log4j_confObject



353
354
355
# File 'lib/mortar/local/pig.rb', line 353

def log4j_conf
 "#{lib_directory}/conf/log4j-cli-local-dev.properties"
end

#log4j_conf_no_projectObject



357
358
359
# File 'lib/mortar/local/pig.rb', line 357

def log4j_conf_no_project
 "#{lib_directory}/conf/log4j-cli-local-no-project.properties"
end

#make_pig_param_file(pig_parameters) ⇒ Object

Given a set of user specified pig parameters, combine with the automatic mortar parameters and write out to a tempfile, returning it’s path so it may be referenced later in the process



406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
# File 'lib/mortar/local/pig.rb', line 406

def make_pig_param_file(pig_parameters)
  mortar_pig_params = automatic_parameters()
  all_parameters = mortar_pig_params.concat(pig_parameters)
  param_file = Tempfile.new("mortar-pig-parameters")
  all_parameters.each { |p|
    param_file.write("#{p['name']}=#{p['value']}\n")
  }
  param_file.close(false)

  # Keep track a reference the tempfile object so that the
  # garbage collector does not automatically delete the file
  # out from under us
  @temp_file_objects.push(param_file)

  param_file.path
end

#pig_archive_url(pig_version) ⇒ Object



94
95
96
97
# File 'lib/mortar/local/pig.rb', line 94

def pig_archive_url(pig_version)
  default_url = full_host + "/" + pig_version.tgz_default_url_path
  ENV.fetch('PIG_DISTRO_URL', default_url)
end

#pig_classpath(pig_version) ⇒ Object



341
342
343
344
345
346
347
348
349
350
351
# File 'lib/mortar/local/pig.rb', line 341

def pig_classpath(pig_version)
  [ "#{pig_directory(pig_version)}/lib-local/*",
    "#{lib_directory}/lib-local/*",
    "#{pig_directory(pig_version)}/lib-pig/*",
    "#{pig_directory(pig_version)}/lib-cluster/*",
    "#{lib_directory}/lib-pig/*",
    "#{lib_directory}/lib-cluster/*",
    "#{jython_directory}/jython.jar",
    "#{project_root}/lib/*",
  ].join(":")
end

#pig_command_script_template_parameters(cmd, pig_version, pig_parameters) ⇒ Object

Parameters necessary for rendering the bash script template



362
363
364
365
366
367
368
369
370
371
372
373
374
# File 'lib/mortar/local/pig.rb', line 362

def pig_command_script_template_parameters(cmd, pig_version, pig_parameters)
  template_params = {}
  template_params['pig_params_file'] = make_pig_param_file(pig_parameters)
  template_params['pig_dir'] = pig_version.pig_version
  template_params['pig_home'] = pig_directory(pig_version)
  template_params['pig_classpath'] = pig_classpath(pig_version)
  template_params['classpath'] = template_params_classpath
  template_params['log4j_conf'] = log4j_conf
  template_params['no_project_log4j_conf'] = log4j_conf_no_project
  template_params['pig_sub_command'] = cmd
  template_params['pig_opts'] = pig_options
  template_params
end

#pig_command_script_template_pathObject

Path to the template which generates the bash script for running pig



319
320
321
# File 'lib/mortar/local/pig.rb', line 319

def pig_command_script_template_path
  File.expand_path("../../templates/script/runpig.sh", __FILE__)
end

#pig_directory(pig_version) ⇒ Object



86
87
88
# File 'lib/mortar/local/pig.rb', line 86

def pig_directory(pig_version)
  return File.join(local_install_directory, pig_version.pig_version)
end

#pig_optionsObject

Returns a hash of settings that need to be passed in via pig options



378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
# File 'lib/mortar/local/pig.rb', line 378

def pig_options
  opts = {}
  opts['fs.s3n.awsAccessKeyId'] = ENV['AWS_ACCESS_KEY']
  opts['fs.s3n.awsSecretAccessKey'] = ENV['AWS_SECRET_KEY']
  opts['fs.s3.awsAccessKeyId'] = ENV['AWS_ACCESS_KEY']
  opts['fs.s3.awsSecretAccessKey'] = ENV['AWS_SECRET_KEY']
  opts['pig.events.logformat'] = PIG_LOG_FORMAT
  opts['pig.logfile'] = local_log_dir + "/local-pig.log"
  opts['pig.udf.scripting.log.dir'] = local_udf_log_dir
  opts['python.verbose'] = 'error'
  opts['jython.output'] = true
  opts['python.home'] = jython_directory
  opts['python.path'] = "#{local_install_directory}/../controlscripts/lib:#{local_install_directory}/../vendor/controlscripts/lib"
  opts['python.cachedir'] = jython_cache_directory
  if osx? then
    opts['java.security.krb5.realm'] = 'OX.AC.UK'
    opts['java.security.krb5.kdc'] = 'kdc0.ox.ac.uk:kdc1.ox.ac.uk'
    opts['java.security.krb5.conf'] = '/dev/null'
  else
    opts['java.security.krb5.realm'] = ''
    opts['java.security.krb5.kdc'] = ''
  end
  return opts
end

#run_pig_command(cmd, pig_version, parameters = nil, jython_output = true) ⇒ Object

Run pig with the specified command (‘command’ is anything that can be appended to the command line invocation of Pig that will get it to do something interesting, such as ‘-f some-file.pig’



312
313
314
315
316
# File 'lib/mortar/local/pig.rb', line 312

def run_pig_command(cmd, pig_version, parameters = nil, jython_output = true)
  template_params = pig_command_script_template_parameters(cmd, pig_version, parameters)
  template_params['pig_opts']['jython.output'] = jython_output
  return run_templated_script(pig_command_script_template_path, template_params)
end

#run_script(pig_script, pig_version, pig_parameters) ⇒ Object

run the pig script with user supplied pig parameters



201
202
203
# File 'lib/mortar/local/pig.rb', line 201

def run_script(pig_script, pig_version, pig_parameters)
  run_pig_command(" -f #{pig_script.path}", pig_version, pig_parameters, true)
end

#should_do_lib_install?Boolean

Returns:

  • (Boolean)


109
110
111
# File 'lib/mortar/local/pig.rb', line 109

def should_do_lib_install?
  not (File.exists?(lib_directory))
end

#should_do_lib_update?Boolean

Returns:

  • (Boolean)


119
120
121
# File 'lib/mortar/local/pig.rb', line 119

def should_do_lib_update?
  return is_newer_version('lib-common', lib_archive_url)
end

#should_do_pig_install?(pig_version) ⇒ Boolean

Determines if a pig install needs to occur, true if no pig install present

Returns:

  • (Boolean)


105
106
107
# File 'lib/mortar/local/pig.rb', line 105

def should_do_pig_install?(pig_version)
  not (File.exists?(pig_directory(pig_version)))
end

#should_do_pig_update?(pig_version, command = nil) ⇒ Boolean

Determines if a pig install needs to occur, true if server side pig tgz is newer than date of the existing install

Returns:

  • (Boolean)


115
116
117
# File 'lib/mortar/local/pig.rb', line 115

def should_do_pig_update?(pig_version, command=nil)
  return is_newer_version(pig_version.pig_version, pig_archive_url(pig_version), command)
end

#show_illustrate_output_browser(illustrate_outpath) ⇒ Object



229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
# File 'lib/mortar/local/pig.rb', line 229

def show_illustrate_output_browser(illustrate_outpath)
  ensure_dir_exists("illustrate-output")
  ensure_dir_exists("illustrate-output/resources")
  ensure_dir_exists("illustrate-output/resources/css")
  ensure_dir_exists("illustrate-output/resources/js")
  ensure_dir_exists("illustrate-output/resources/flash")

  ["illustrate_css",
   "jquery", "jquery_transit", "jquery_stylestack",
   "mortar_table", "zeroclipboard", "zeroclipboard_swf"].each { |resource|
    copy_if_not_present_at_dest(@resource_locations[resource], @resource_destinations[resource])
  }

  # Pull in the dumped json file
  illustrate_data_json_text = decode_illustrate_input_file(illustrate_outpath)
  illustrate_data = json_decode(illustrate_data_json_text)

  # Render a template using it's values
  template_params = create_illustrate_template_parameters(illustrate_data)

  # template_params = {'tables' => []}
  erb = ERB.new(File.read(@resource_locations["illustrate_template"]), 0, "%<>")
  html = erb.result(BindingClazz.new(template_params).get_binding)

  # Write the rendered template out to a file
  File.open(@resource_destinations["illustrate_html"], 'w') { |f|
    f.write(html)
  }

  # Open a browser pointing to the rendered template output file
  action("Opening illustrate results from #{@resource_destinations["illustrate_html"]} ") do
    require "launchy"
    Launchy.open(File.expand_path(@resource_destinations["illustrate_html"]))
  end
end

#template_params_classpath(pig_version = nil) ⇒ Object



323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
# File 'lib/mortar/local/pig.rb', line 323

def template_params_classpath(pig_version=nil)
  # Need to support old watchtower plugins that don't set pig_version
  if pig_version.nil?
    pig_version = Mortar::PigVersion::Pig012Hadoop273.new
  end
  [ "#{pig_directory(pig_version)}/*",
    "#{pig_directory(pig_version)}/lib-local/*",
    "#{lib_directory}/lib-local/*",
    "#{pig_directory(pig_version)}/lib-pig/*",
    "#{pig_directory(pig_version)}/lib-cluster/*",
    "#{lib_directory}/lib-pig/*",
    "#{lib_directory}/lib-cluster/*",
    "#{jython_directory}/jython.jar",
    "#{lib_directory}/conf/jets3t.properties",
    "#{project_root}/lib/*",
  ].join(":")
end

#validate_script(pig_script, pig_version, pig_parameters) ⇒ Object



180
181
182
# File 'lib/mortar/local/pig.rb', line 180

def validate_script(pig_script, pig_version, pig_parameters)
  run_pig_command(" -check #{pig_script.path}", pig_version, pig_parameters)
end