Module: Bosh::Director::CloudcheckHelper
- Included in:
- ProblemHandlers::Base
- Defined in:
- lib/bosh/director/cloudcheck_helper.rb
Constant Summary collapse
- DEFAULT_AGENT_TIMEOUT =
This timeout has been made pretty short mainly to avoid long cloudchecks, however 10 seconds should still be pretty generous interval for agent to respond.
10
Instance Method Summary collapse
- #agent_client(vm, timeout = DEFAULT_AGENT_TIMEOUT, retries = 0) ⇒ Object
- #agent_timeout_guard(vm, &block) ⇒ Object
- #cloud ⇒ Object
- #delete_vm(vm) ⇒ Object
- #delete_vm_reference(vm, options = {}) ⇒ Object
- #handler_error(message) ⇒ Object
- #instance_name(vm) ⇒ Object
- #reboot_vm(vm) ⇒ Object
- #recreate_vm(vm) ⇒ Object
Instance Method Details
#agent_client(vm, timeout = DEFAULT_AGENT_TIMEOUT, retries = 0) ⇒ Object
33 34 35 36 37 38 39 40 |
# File 'lib/bosh/director/cloudcheck_helper.rb', line 33 def agent_client(vm, timeout = DEFAULT_AGENT_TIMEOUT, retries = 0) = { :timeout => timeout, :retry_methods => { :get_state => retries } } @clients ||= {} @clients[vm.agent_id] ||= AgentClient.with_defaults(vm.agent_id, ) end |
#agent_timeout_guard(vm, &block) ⇒ Object
42 43 44 45 46 |
# File 'lib/bosh/director/cloudcheck_helper.rb', line 42 def agent_timeout_guard(vm, &block) yield agent_client(vm) rescue Bosh::Director::RpcTimeout handler_error("VM `#{vm.cid}' is not responding") end |
#cloud ⇒ Object
16 17 18 |
# File 'lib/bosh/director/cloudcheck_helper.rb', line 16 def cloud Bosh::Director::Config.cloud end |
#delete_vm(vm) ⇒ Object
57 58 59 60 61 62 63 64 65 66 67 68 69 |
# File 'lib/bosh/director/cloudcheck_helper.rb', line 57 def delete_vm(vm) # Paranoia: don't blindly delete VMs with persistent disk disk_list = agent_timeout_guard(vm) { |agent| agent.list_disk } if disk_list.size != 0 handler_error("VM has persistent disk attached") end cloud.delete_vm(vm.cid) vm.db.transaction do vm.instance.update(:vm => nil) if vm.instance vm.destroy end end |
#delete_vm_reference(vm, options = {}) ⇒ Object
71 72 73 74 75 76 77 78 79 80 |
# File 'lib/bosh/director/cloudcheck_helper.rb', line 71 def delete_vm_reference(vm, ={}) if vm.cid && ![:skip_cid_check] handler_error("VM has a CID") end vm.db.transaction do vm.instance.update(:vm => nil) if vm.instance vm.destroy end end |
#handler_error(message) ⇒ Object
20 21 22 |
# File 'lib/bosh/director/cloudcheck_helper.rb', line 20 def handler_error() raise Bosh::Director::ProblemHandlerError, end |
#instance_name(vm) ⇒ Object
24 25 26 27 28 29 30 31 |
# File 'lib/bosh/director/cloudcheck_helper.rb', line 24 def instance_name(vm) instance = vm.instance return "Unknown VM" if instance.nil? job = instance.job || "unknown job" index = instance.index || "unknown index" "#{job}/#{index}" end |
#reboot_vm(vm) ⇒ Object
48 49 50 51 52 53 54 55 |
# File 'lib/bosh/director/cloudcheck_helper.rb', line 48 def reboot_vm(vm) cloud.reboot_vm(vm.cid) begin agent_client(vm).wait_until_ready rescue Bosh::Director::RpcTimeout handler_error("Agent still unresponsive after reboot") end end |
#recreate_vm(vm) ⇒ Object
82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
# File 'lib/bosh/director/cloudcheck_helper.rb', line 82 def recreate_vm(vm) # Best we can do without any feedback from the agent # is to use the spec persisted in the DB at the time # of last apply call. # This method is somewhat similar in its nature to what # InstanceUpdater is doing in case of the stemcell update, # however we don't need to handle some advanced scenarios # such as disk migration. spec = validate_spec(vm) env = validate_env(vm) resource_pool_spec = spec.fetch("resource_pool", {}) stemcell = find_stemcell(resource_pool_spec.fetch("stemcell", {})) deployment = vm.deployment handler_error("VM doesn't belong to any deployment") unless deployment instance = vm.instance disk_cid = instance ? instance.persistent_disk_cid : nil # One situation where this handler is actually useful is when # VM has already been deleted but something failed after that # and it is still referenced in DB. In that case it makes sense # to ignore "VM not found" errors in `delete_vm' and let the method # proceed creating a new VM. Other errors are not forgiven. begin cloud.delete_vm(vm.cid) rescue Bosh::Clouds::VMNotFound => e @logger.warn("VM '#{vm.cid}' might have already been deleted from the cloud") end vm.db.transaction do instance.update(:vm => nil) if instance vm.destroy end cloud_properties = resource_pool_spec.fetch("cloud_properties", {}) networks = spec["networks"] new_vm = VmCreator.create(deployment, stemcell, cloud_properties, networks, Array(disk_cid), env) new_vm.apply_spec = spec new_vm.save if instance instance.update(:vm => new_vm) # refresh metadata after new instance has been set VmMetadataUpdater.build.update(new_vm, {}) end agent_client(new_vm).wait_until_ready # After this point agent is actually responding to # pings, so if the rest of this handler fails # bcck won't find this type of problem again # but regular deployment will fail with "out-of-sync" # error (as we now have an instance that points to # VM that reports empty state). This problem # should be handled by "out-of-sync VM" problem handler. if disk_cid # N.B. attach_disk might fail if disk image is no longer # there or for some other reason. Generally it means # the data has been lost (e.g. someone deleted VM from vCenter # along with the disk. cloud.attach_disk(new_vm.cid, disk_cid) agent_client(new_vm).mount_disk(disk_cid) end agent_client(new_vm).apply(spec) if instance && instance.state == "started" agent_client(new_vm).start end end |