Class: Bosh::AwsCloud::Cloud

Inherits:
Cloud
  • Object
show all
Includes:
Helpers
Defined in:
lib/cloud/aws/cloud.rb

Constant Summary collapse

DEFAULT_MAX_RETRIES =

default maximum number of times to retry an AWS API call

2
METADATA_TIMEOUT =

in seconds

5
DEVICE_POLL_TIMEOUT =

in seconds

60

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Helpers

#cloud_error, #extract_security_group_names

Constructor Details

#initialize(options) ⇒ Cloud

Initialize BOSH AWS CPI. The contents of sub-hashes are defined in the README

Parameters:

  • options (Hash)

    CPI options

Options Hash (options):

  • aws (Hash)

    AWS specific options

  • agent (Hash)

    agent options

  • registry (Hash)

    agent options



25
26
27
28
29
30
31
32
33
34
35
# File 'lib/cloud/aws/cloud.rb', line 25

def initialize(options)
  @options = options.dup.freeze
  validate_options

  @logger = Bosh::Clouds::Config.logger

  initialize_aws
  initialize_registry

  @metadata_lock = Mutex.new
end

Instance Attribute Details

#ec2Object (readonly)

Returns the value of attribute ec2.



14
15
16
# File 'lib/cloud/aws/cloud.rb', line 14

def ec2
  @ec2
end

#loggerObject

Returns the value of attribute logger.



17
18
19
# File 'lib/cloud/aws/cloud.rb', line 17

def logger
  @logger
end

#optionsObject (readonly)

Returns the value of attribute options.



16
17
18
# File 'lib/cloud/aws/cloud.rb', line 16

def options
  @options
end

#registryObject (readonly)

Returns the value of attribute registry.



15
16
17
# File 'lib/cloud/aws/cloud.rb', line 15

def registry
  @registry
end

Instance Method Details

#attach_disk(instance_id, disk_id) ⇒ Object

Attach an EBS volume to an EC2 instance

Parameters:

  • instance_id (String)

    EC2 instance id of the virtual machine to attach the disk to

  • disk_id (String)

    EBS volume id of the disk to attach



219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
# File 'lib/cloud/aws/cloud.rb', line 219

def attach_disk(instance_id, disk_id)
  with_thread_name("attach_disk(#{instance_id}, #{disk_id})") do
    instance = @ec2.instances[instance_id]
    volume = @ec2.volumes[disk_id]

    device_name = attach_ebs_volume(instance, volume)

    update_agent_settings(instance) do |settings|
      settings["disks"] ||= {}
      settings["disks"]["persistent"] ||= {}
      settings["disks"]["persistent"][disk_id] = device_name
    end
    logger.info("Attached `#{disk_id}' to `#{instance_id}'")
  end
end

#compare_private_ip_addresses(instance, specified_ip_address) ⇒ void

This method returns an undefined value.

Compares actual instance private IP addresses with the IP address specified at the network spec

change the IP address of a running server, so we need to send the InstanceUpdater a request to do it for us

Parameters:

  • instance (AWS::EC2::Instance)

    EC2 instance

  • specified_ip_address (String)

    IP address specified at the network spec (if Manual Network)

Raises:

  • (Bosh::Clouds:NotSupported)

    If the IP address change, we need to recreate the VM as you can’t



357
358
359
360
361
362
363
364
365
# File 'lib/cloud/aws/cloud.rb', line 357

def compare_private_ip_addresses(instance, specified_ip_address)
  actual_ip_address = instance.private_ip_address

  unless specified_ip_address.nil? || actual_ip_address == specified_ip_address
    raise Bosh::Clouds::NotSupported,
          "IP address change requires VM recreation: %s to %s" %
          [actual_ip_address, specified_ip_address]
  end
end

#compare_security_groups(instance, network_spec) ⇒ Object

If the security groups change, we need to recreate the VM as you can’t change the security group of a running instance, we need to send the InstanceUpdater a request to do it for us



333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
# File 'lib/cloud/aws/cloud.rb', line 333

def compare_security_groups(instance, network_spec)
  actual_group_names = instance.security_groups.collect { |sg| sg.name }
  specified_group_names = extract_security_group_names(network_spec)
  if specified_group_names.empty?
    new_group_names = Array(aws_properties["default_security_groups"])
  else
    new_group_names = specified_group_names
  end

  unless actual_group_names.sort == new_group_names.sort
    raise Bosh::Clouds::NotSupported,
          "security groups change requires VM recreation: %s to %s" %
              [actual_group_names.join(", "), new_group_names.join(", ")]
  end
end

#configure_networks(instance_id, network_spec) ⇒ Object

Configure network for an EC2 instance

Parameters:

  • instance_id (String)

    EC2 instance id

  • network_spec (Hash)

    network properties

Raises:

  • (Bosh::Clouds:NotSupported)

    if there’s a network change that requires the recreation of the VM



310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
# File 'lib/cloud/aws/cloud.rb', line 310

def configure_networks(instance_id, network_spec)
  with_thread_name("configure_networks(#{instance_id}, ...)") do
    logger.info("Configuring '#{instance_id}' to use new network settings: #{network_spec.pretty_inspect}")

    instance = @ec2.instances[instance_id]

    network_configurator = NetworkConfigurator.new(network_spec)

    compare_security_groups(instance, network_spec)

    compare_private_ip_addresses(instance, network_configurator.private_ip)

    network_configurator.configure(@ec2, instance)

    update_agent_settings(instance) do |settings|
      settings["networks"] = network_spec
    end
  end
end

#create_disk(size, instance_id = nil) ⇒ String

Creates a new EBS volume

Parameters:

  • size (Integer)

    disk size in MiB

  • instance_id (optional, String) (defaults to: nil)

    EC2 instance id of the VM that this disk will be attached to

Returns:

  • (String)

    created EBS volume id



155
156
157
158
159
160
161
162
163
164
165
166
167
168
# File 'lib/cloud/aws/cloud.rb', line 155

def create_disk(size, instance_id = nil)
  with_thread_name("create_disk(#{size}, #{instance_id})") do
    validate_disk_size(size)

    # if the disk is created for an instance, use the same availability zone as they must match
    volume = @ec2.volumes.create(:size => (size / 1024.0).ceil,
                                 :availability_zone => @az_selector.select_availability_zone(instance_id))

    logger.info("Creating volume '#{volume.id}'")
    ResourceWait.for_volume(volume: volume, state: :available)

    volume.id
  end
end

#create_stemcell(image_path, stemcell_properties) ⇒ String

Creates a new EC2 AMI using stemcell image. This method can only be run on an EC2 instance, as image creation involves creating and mounting new EBS volume as local block device.

Parameters:

  • image_path (String)

    local filesystem path to a stemcell image

  • cloud_properties (Hash)

    AWS-specific stemcell properties

Returns:

  • (String)

    EC2 AMI name of the stemcell



383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
# File 'lib/cloud/aws/cloud.rb', line 383

def create_stemcell(image_path, stemcell_properties)
  with_thread_name("create_stemcell(#{image_path}...)") do
    creator = StemcellCreator.new(region, stemcell_properties)

    return creator.fake.id if creator.fake?

    begin
      # These three variables are used in 'ensure' clause
      instance = nil
      volume = nil

      # 1. Create and mount new EBS volume (2GB default)
      disk_size = stemcell_properties["disk"] || 2048
      volume_id = create_disk(disk_size, current_vm_id)
      volume = @ec2.volumes[volume_id]
      instance = @ec2.instances[current_vm_id]

      sd_name = attach_ebs_volume(instance, volume)
      ebs_volume = find_ebs_device(sd_name)

      logger.info("Creating stemcell with: '#{volume.id}' and '#{stemcell_properties.inspect}'")
      creator.create(volume, ebs_volume, image_path).id
    rescue => e
      logger.error(e)
      raise e
    ensure
      if instance && volume
        detach_ebs_volume(instance, volume, true)
        delete_disk(volume.id)
      end
    end
  end
end

#create_vm(agent_id, stemcell_id, resource_pool, network_spec, disk_locality = nil, environment = nil) ⇒ String

Create an EC2 instance and wait until it’s in running state

Parameters:

  • agent_id (String)

    agent id associated with new VM

  • stemcell_id (String)

    AMI id of the stemcell used to create the new instance

  • resource_pool (Hash)

    resource pool specification

  • network_spec (Hash)

    network specification, if it contains security groups they must already exist

  • disk_locality (optional, Array) (defaults to: nil)

    list of disks that might be attached to this instance in the future, can be used as a placement hint (i.e. instance will only be created if resource pool availability zone is the same as disk availability zone)

  • environment (optional, Hash) (defaults to: nil)

    data to be merged into agent settings

Returns:

  • (String)

    EC2 instance id of the new virtual machine



81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# File 'lib/cloud/aws/cloud.rb', line 81

def create_vm(agent_id, stemcell_id, resource_pool, network_spec, disk_locality = nil, environment = nil)
  with_thread_name("create_vm(#{agent_id}, ...)") do
    # do this early to fail fast
    stemcell = StemcellFinder.find_by_region_and_id(region, stemcell_id)

    begin
      instance_manager = InstanceManager.new(region, registry, az_selector)
      instance = instance_manager.
          create(agent_id, stemcell.image_id, resource_pool, network_spec, (disk_locality || []), environment, options)

      logger.info("Creating new instance '#{instance.id}'")

      NetworkConfigurator.new(network_spec).configure(region, instance)

      registry_settings = initial_agent_settings(
          agent_id,
          network_spec,
          environment,
          stemcell.root_device_name,
      )
      registry.update_settings(instance.id, registry_settings)

      instance.id
    rescue => e # is this rescuing too much?
      logger.error(%Q[Failed to create instance: #{e.message}\n#{e.backtrace.join("\n")}])
      instance_manager.terminate(instance.id, fast_path_delete?) if instance
      raise e
    end
  end
end

#current_vm_idObject

Reads current instance id from EC2 metadata. We are assuming instance id cannot change while current process is running and thus memoizing it.



41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/cloud/aws/cloud.rb', line 41

def current_vm_id
  @metadata_lock.synchronize do
    return @current_vm_id if @current_vm_id

    client = HTTPClient.new
    client.connect_timeout = METADATA_TIMEOUT
    # Using 169.254.169.254 is an EC2 convention for getting
    # instance metadata
    uri = "http://169.254.169.254/latest/meta-data/instance-id/"

    response = client.get(uri)
    unless response.status == 200
      cloud_error("Instance metadata endpoint returned " \
                  "HTTP #{response.status}")
    end

    @current_vm_id = response.body
  end

rescue HTTPClient::TimeoutError
  cloud_error("Timed out reading instance metadata, " \
              "please make sure CPI is running on EC2 instance")
end

#default_ec2_endpointObject



112
113
114
# File 'lib/cloud/aws/cloud.rb', line 112

def default_ec2_endpoint
  ['ec2', aws_region, 'amazonaws.com'].compact.join('.')
end

#default_elb_endpointObject



116
117
118
# File 'lib/cloud/aws/cloud.rb', line 116

def default_elb_endpoint
  ['elasticloadbalancing', aws_region, 'amazonaws.com'].compact.join('.')
end

#delete_disk(disk_id) ⇒ Object

Delete EBS volume

Parameters:

  • disk_id (String)

    EBS volume id

Raises:

  • (Bosh::Clouds::CloudError)

    if disk is not in available state



181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
# File 'lib/cloud/aws/cloud.rb', line 181

def delete_disk(disk_id)
  with_thread_name("delete_disk(#{disk_id})") do
    volume = @ec2.volumes[disk_id]

    logger.info("Deleting volume `#{volume.id}'")

    tries = 10
    sleep_cb = ResourceWait.sleep_callback("Waiting for volume `#{volume.id}' to be deleted", tries)
    ensure_cb = Proc.new do |retries|
      cloud_error("Timed out waiting to delete volume `#{volume.id}'") if retries == tries
    end
    error = AWS::EC2::Errors::Client::VolumeInUse

    Bosh::Common.retryable(tries: tries, sleep: sleep_cb, on: error, ensure: ensure_cb) do
      volume.delete
      true # return true to only retry on Exceptions
    end

    if fast_path_delete?
      begin
        TagManager.tag(volume, "Name", "to be deleted")
        logger.info("Volume `#{disk_id}' has been marked for deletion")
      rescue AWS::EC2::Errors::InvalidVolume::NotFound
        # Once in a blue moon AWS if actually fast enough that the volume is already gone
        # when we get here, and if it is, our work here is done!
      end
      return
    end

    ResourceWait.for_volume(volume: volume, state: :deleted)

    logger.info("Volume `#{disk_id}' has been deleted")
  end
end

#delete_snapshot(snapshot_id) ⇒ Object

Delete a disk snapshot

Parameters:

  • snapshot_id (String)

    snapshot id to delete



293
294
295
296
297
298
299
300
301
302
303
304
# File 'lib/cloud/aws/cloud.rb', line 293

def delete_snapshot(snapshot_id)
  with_thread_name("delete_snapshot(#{snapshot_id})") do
    snapshot = @ec2.snapshots[snapshot_id]

    if snapshot.status == :in_use
      raise Bosh::Clouds::CloudError, "snapshot '#{snapshot.id}' can not be deleted as it is in use"
    end

    snapshot.delete
    logger.info("snapshot '#{snapshot_id}' deleted")
  end
end

#delete_stemcell(stemcell_id) ⇒ Object

Delete a stemcell and the accompanying snapshots

Parameters:

  • stemcell_id (String)

    EC2 AMI name of the stemcell to be deleted



419
420
421
422
423
424
# File 'lib/cloud/aws/cloud.rb', line 419

def delete_stemcell(stemcell_id)
  with_thread_name("delete_stemcell(#{stemcell_id})") do
    stemcell = StemcellFinder.find_by_region_and_id(region, stemcell_id)
    stemcell.delete
  end
end

#delete_vm(instance_id) ⇒ Object

Delete EC2 instance (“terminate” in AWS language) and wait until it reports as terminated

Parameters:

  • instance_id (String)

    EC2 instance id



124
125
126
127
128
129
# File 'lib/cloud/aws/cloud.rb', line 124

def delete_vm(instance_id)
  with_thread_name("delete_vm(#{instance_id})") do
    logger.info("Deleting instance '#{instance_id}'")
    InstanceManager.new(region, registry).terminate(instance_id, fast_path_delete?)
  end
end

#detach_disk(instance_id, disk_id) ⇒ Object

Detach an EBS volume from an EC2 instance

Parameters:

  • instance_id (String)

    EC2 instance id of the virtual machine to detach the disk from

  • disk_id (String)

    EBS volume id of the disk to detach



238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
# File 'lib/cloud/aws/cloud.rb', line 238

def detach_disk(instance_id, disk_id)
  with_thread_name("detach_disk(#{instance_id}, #{disk_id})") do
    instance = @ec2.instances[instance_id]
    volume = @ec2.volumes[disk_id]

    update_agent_settings(instance) do |settings|
      settings["disks"] ||= {}
      settings["disks"]["persistent"] ||= {}
      settings["disks"]["persistent"].delete(disk_id)
    end

    detach_ebs_volume(instance, volume)

    logger.info("Detached `#{disk_id}' from `#{instance_id}'")
  end
end

#find_ebs_device(sd_name) ⇒ Object



457
458
459
460
461
462
463
464
465
466
467
468
469
470
# File 'lib/cloud/aws/cloud.rb', line 457

def find_ebs_device(sd_name)
  xvd_name = sd_name.gsub(/^\/dev\/sd/, "/dev/xvd")

  DEVICE_POLL_TIMEOUT.times do
    if File.blockdev?(sd_name)
      return sd_name
    elsif File.blockdev?(xvd_name)
      return xvd_name
    end
    sleep(1)
  end

  cloud_error("Cannot find EBS volume on current instance")
end

#get_disks(vm_id) ⇒ Object



255
256
257
258
259
260
261
262
263
# File 'lib/cloud/aws/cloud.rb', line 255

def get_disks(vm_id)
  disks = []
  @ec2.instances[vm_id].block_devices.each do |block_device|
    if block_device[:ebs]
      disks << block_device[:ebs][:volume_id]
    end
  end
  disks
end

#has_vm?(instance_id) ⇒ Boolean

Has EC2 instance

Parameters:

  • instance_id (String)

    EC2 instance id

Returns:

  • (Boolean)


143
144
145
146
147
# File 'lib/cloud/aws/cloud.rb', line 143

def has_vm?(instance_id)
  with_thread_name("has_vm?(#{instance_id})") do
    InstanceManager.new(region, registry).has_instance?(instance_id)
  end
end

#reboot_vm(instance_id) ⇒ Object

Reboot EC2 instance

Parameters:

  • instance_id (String)

    EC2 instance id



134
135
136
137
138
# File 'lib/cloud/aws/cloud.rb', line 134

def reboot_vm(instance_id)
  with_thread_name("reboot_vm(#{instance_id})") do
    InstanceManager.new(region, registry).reboot(instance_id)
  end
end

#set_vm_metadata(vm, metadata) ⇒ void

This method returns an undefined value.

Add tags to an instance. In addition to the suplied tags, it adds a ‘Name’ tag as it is shown in the AWS console.

Parameters:

  • vm (String)

    vm id that was once returned by #create_vm

  • metadata (Hash)

    metadata key/value pairs



431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
# File 'lib/cloud/aws/cloud.rb', line 431

def (vm, )
  instance = @ec2.instances[vm]

  .each_pair do |key, value|
    TagManager.tag(instance, key, value)
  end

  job = [:job]
  index = [:index]

  if job && index
    name = "#{job}/#{index}"
  elsif [:compiling]
    name = "compiling/#{[:compiling]}"
  end
  TagManager.tag(instance, "Name", name) if name
rescue AWS::EC2::Errors::TagLimitExceeded => e
  logger.error("could not tag #{instance.id}: #{e.message}")
end

#snapshot_disk(disk_id, metadata) ⇒ String

Take snapshot of disk

Parameters:

  • disk_id (String)

    disk id of the disk to take the snapshot of

Returns:

  • (String)

    snapshot id



268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
# File 'lib/cloud/aws/cloud.rb', line 268

def snapshot_disk(disk_id, )
  with_thread_name("snapshot_disk(#{disk_id})") do
    volume = @ec2.volumes[disk_id]
    devices = []
    volume.attachments.each {|attachment| devices << attachment.device}

    name = [:deployment, :job, :index].collect { |key| [key] }
    name << devices.first.split('/').last unless devices.empty?

    snapshot = volume.create_snapshot(name.join('/'))
    logger.info("snapshot '#{snapshot.id}' of volume '#{disk_id}' created")

    [:agent_id, :instance_id, :director_name, :director_uuid].each do |key|
      TagManager.tag(snapshot, key, [key])
    end
    TagManager.tag(snapshot, :device, devices.first) unless devices.empty?
    TagManager.tag(snapshot, 'Name', name.join('/'))

    ResourceWait.for_snapshot(snapshot: snapshot, state: :completed)
    snapshot.id
  end
end

#validate_deployment(old_manifest, new_manifest) ⇒ Object

Note:

Not implemented in the AWS CPI



452
453
454
455
# File 'lib/cloud/aws/cloud.rb', line 452

def validate_deployment(old_manifest, new_manifest)
  # Not implemented in VSphere CPI as well
  not_implemented(:validate_deployment)
end

#validate_disk_size(size) ⇒ Object

Raises:

  • (ArgumentError)


170
171
172
173
174
175
# File 'lib/cloud/aws/cloud.rb', line 170

def validate_disk_size(size)
  raise ArgumentError, "disk size needs to be an integer" unless size.kind_of?(Integer)

  cloud_error("AWS CPI minimum disk size is 1 GiB") if size < 1024
  cloud_error("AWS CPI maximum disk size is 1 TiB") if size > 1024 * 1000
end