Skip to content

Commit

Permalink
Second solution to #369
Browse files Browse the repository at this point in the history
  • Loading branch information
micafer committed Sep 7, 2017
1 parent 229899e commit ea858ba
Show file tree
Hide file tree
Showing 7 changed files with 91 additions and 55 deletions.
11 changes: 5 additions & 6 deletions IM/InfrastructureManager.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
from radl import radl_parse
from radl.radl import Feature, RADL
from radl.radl_json import dump_radl as dump_radl_json
from IM.InfrastructureInfo import InfrastructureInfo

if Config.MAX_SIMULTANEOUS_LAUNCHES > 1:
from multiprocessing.pool import ThreadPool
Expand Down Expand Up @@ -159,7 +158,7 @@ def _launch_group(sel_inf, deploy_group, cloud_id, cloud_list, concrete_systems,

cloud = cloud_list[cloud_id]
for deploy in deploy_group:
if not deploy.id.startswith(InfrastructureInfo.FAKE_SYSTEM):
if not deploy.id.startswith(IM.InfrastructureInfo.InfrastructureInfo.FAKE_SYSTEM):
concrete_system = concrete_systems[cloud_id][deploy.id][0]
launched_vms = []
launch_radl = radl.clone()
Expand Down Expand Up @@ -187,8 +186,8 @@ def _launch_group(sel_inf, deploy_group, cloud_id, cloud_list, concrete_systems,
try:
InfrastructureManager.logger.debug(
"Launching %d VMs of type %s" % (deploy.vm_number, concrete_system.name))
launched_vms = cloud.cloud.getCloudConnector(sel_inf).launch(
sel_inf, launch_radl, requested_radl, deploy.vm_number, auth)
launched_vms = cloud.cloud.getCloudConnector(sel_inf).launch_with_retry(
sel_inf, launch_radl, requested_radl, deploy.vm_number, auth, Config.MAX_VM_FAILS)
except Exception as e:
InfrastructureManager.logger.exception("Error launching some of the VMs: %s" % e)
for _ in range(deploy.vm_number):
Expand All @@ -201,7 +200,7 @@ def _launch_group(sel_inf, deploy_group, cloud_id, cloud_list, concrete_systems,
if len(launched_vms) < deploy.vm_number:
for _ in range(deploy.vm_number - len(launched_vms)):
launched_vms.append((False, "Error in deploy: " + deploy.id))

for success, launched_vm in launched_vms:
if success:
InfrastructureManager.logger.debug("VM successfully launched: " + str(launched_vm.id))
Expand Down Expand Up @@ -510,7 +509,7 @@ def AddResource(inf_id, radl_data, auth, context=True, failed_clouds=[]):
reverse=True)
if sorted_scored_clouds and sorted_scored_clouds[0]:
deploys_group_cloud[id(deploy_group)] = sorted_scored_clouds[0][0]
else:
else:
raise Exception("No cloud provider available")

# Launch every group in the same cloud provider
Expand Down
4 changes: 2 additions & 2 deletions IM/VirtualMachine.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def finalize(self, last, auth):
Finalize the VM
"""
# In case of a VM failed during creation
if self.state == VirtualMachine.FAILED and self.id == None:
if self.state == VirtualMachine.FAILED and self.id is None:
# set as deleted and return
self.destroy = True
return (True, "")
Expand Down Expand Up @@ -460,7 +460,7 @@ def update_status(self, auth, force=False):
"""
with self._lock:
# In case of a VM failed during creation, do not update
if self.state == VirtualMachine.FAILED and self.id == None:
if self.state == VirtualMachine.FAILED and self.id is None:
return False

now = int(time.time())
Expand Down
43 changes: 17 additions & 26 deletions IM/connectors/Azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -612,34 +612,25 @@ def launch(self, inf, radl, requested_radl, num_vm, auth_data):
subnets = self.create_nets(inf, radl, credentials, subscription_id, "rg-%s" % inf.id)

res = []
remaining_vms = num_vm
retries = 0
while remaining_vms > 0 and retries < Config.MAX_VM_FAILS:
retries += 1
vms = self.create_vms(inf, radl, requested_radl, remaining_vms, location,
storage_account_name, subnets, credentials, subscription_id)

for success, data in vms:
if success:
vm, async_vm_creation = data
try:
self.log_debug("Waiting VM ID %s to be created." % vm.id)
async_vm_creation.wait()
res.append((True, vm))
remaining_vms -= 1
except:
self.log_exception("Error waiting the VM %s." % vm.id)
vms = self.create_vms(inf, radl, requested_radl, num_vm, location,
storage_account_name, subnets, credentials, subscription_id)

self.log_debug("End of retry %d of %d" % (retries, Config.MAX_VM_FAILS))
remaining_vms = num_vm
for success, data in vms:
if success:
vm, async_vm_creation = data
try:
self.log_debug("Waiting VM ID %s to be created." % vm.id)
async_vm_creation.wait()
res.append((True, vm))
remaining_vms -= 1
except Exception as ex:
self.log_exception("Error waiting the VM %s." % vm.id)
res.append((False, "Error waiting the VM %s: %s" % (vm.id, str(ex))))
else:
res.append((False, data))

if remaining_vms > 0:
# Remove the general group
self.log_debug("Delete Inf RG group %s" % "rg-%s" % inf.id)
try:
resource_client.resource_groups.delete("rg-%s" % inf.id)
except:
pass
else:
if remaining_vms == 0:
self.log_debug("All VMs created successfully.")

return res
Expand Down
45 changes: 45 additions & 0 deletions IM/connectors/CloudConnector.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,51 @@ def launch(self, inf, radl, requested_radl, num_vm, auth_data):

raise NotImplementedError("Should have implemented this")

def launch_with_retry(self, inf, radl, requested_radl, num_vm, auth_data, max_num):
"""
Launch a set of VMs to the Cloud provider with a set of retries in case of failure
Args:
- inf(InfrastructureInfo): InfrastructureInfo object the VM is part of.
- radl(RADL): RADL document.
- num_vm(int): number of instances to deploy.
- auth_data(Authentication): Authentication data to access cloud provider.
- max_num: Number of retries.
Returns: a list of tuples with the format (success, vm).
- The first value is True if the operation finished successfully or false otherwise.
- The second value is a :py:class:`IM.VirtualMachine` of the launched VMs if the operation
finished successfully or a str with an error message otherwise.
"""
res_ok = []
res_err = {}
retries = 0
while len(res_ok) < num_vm and retries < max_num:
retries += 1
err_count = 0
for success, vm in self.launch(inf, radl, requested_radl, num_vm - len(res_ok), auth_data):
if success:
res_ok.append(vm)
else:
if err_count not in res_err:
res_err[err_count] = [vm]
else:
res_err[err_count].append(vm)
err_count += 1

res = []
for elem in res_ok:
res.append((True, elem))

for i in range(num_vm - len(res_ok)):
msgs = ""
for n, msg in enumerate(res_err[i]):
msgs += "Attempt %d: %s\n" % (n + 1, msg)
res.append((False, msgs))

return res

def finalize(self, vm, last, auth_data):
""" Terminates a VM
Expand Down
11 changes: 0 additions & 11 deletions IM/connectors/OpenNebula.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,6 @@ def launch(self, inf, radl, requested_radl, num_vm, auth_data):
template = self.getONETemplate(radl, sgs, auth_data)
res = []
i = 0
all_failed = True
while i < num_vm:
func_res = server.one.vm.allocate(session_id, template)
if len(func_res) == 2:
Expand All @@ -454,20 +453,10 @@ def launch(self, inf, radl, requested_radl, num_vm, auth_data):
vm.info.systems[0].setValue('instance_id', str(res_id))
inf.add_vm(vm)
res.append((success, vm))
all_failed = False
else:
res.append((success, "ERROR: " + str(res_id)))
i += 1

if all_failed:
self.log_debug("All VMs failed, delete Security Groups.")
for sg in sgs.values():
self.log_debug("Delete Security Group: %d." % sg)
success, sg_id, _ = server.one.secgroup.delete(session_id, sg)
if success:
self.log_debug("Deleted.")
else:
self.log_debug("Error deleting SG: %s." % sg_id)
return res

def delete_security_groups(self, inf, auth_data, timeout=90, delay=10):
Expand Down
4 changes: 1 addition & 3 deletions test/unit/connectors/Azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
from IM.VirtualMachine import VirtualMachine
from IM.InfrastructureInfo import InfrastructureInfo
from IM.connectors.Azure import AzureCloudConnector
from IM.config import Config
from mock import patch, MagicMock, call


Expand Down Expand Up @@ -200,8 +199,7 @@ def test_20_launch(self, save_data, credentials, network_client, compute_client,

cclient.virtual_machines.create_or_update.side_effect = self.create_vm

Config.MAX_VM_FAILS = 2
res = azure_cloud.launch(InfrastructureInfo(), radl, radl, 3, auth)
res = azure_cloud.launch_with_retry(InfrastructureInfo(), radl, radl, 3, auth, 2)
self.assertEqual(len(res), 3)
self.assertTrue(res[0][0])
self.assertTrue(res[1][0])
Expand Down
28 changes: 21 additions & 7 deletions test/unit/test_im_logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,8 @@ def test_inf_creation_addition_clouds(self):
" are asked to be deployed in different cloud providers",
str(ex.exception))

def test_inf_creation_errors(self):
@patch("IM.connectors.OCCI.OCCICloudConnector")
def test_inf_creation_errors(self, occi):
"""Create infrastructure """

radl = """"
Expand All @@ -237,7 +238,7 @@ def test_inf_creation_errors(self):
disk.0.os.credentials.username = 'ubuntu'
)
deploy front 1
deploy wn 2
deploy wn 1
"""

# this case must fail only with one error
Expand All @@ -256,16 +257,29 @@ def test_inf_creation_errors(self):
self.assertIn('VM 1:\nError launching the VMs of type wn to cloud ID ost of type OpenStack. '
'Error, no concrete system to deploy: wn in cloud: ost. '
'Check if a correct image is being used', res)
self.assertIn('VM 2:\nError launching the VMs of type wn to cloud ID ost of type OpenStack. '
'Error, no concrete system to deploy: wn in cloud: ost. '
'Check if a correct image is being used', res)

# this case must work OK
auth0 = Authentication([{'id': 'dummy', 'type': 'Dummy'},
{'type': 'InfrastructureManager', 'username': 'test',
'password': 'tests'}])
IM.CreateInfrastructure(radl, auth0)

radl = RADL()
radl.add(system("s0", [Feature("disk.0.image.url", "=", "mock0://linux.for.ev.er"),
Feature("disk.0.os.credentials.username", "=", "user"),
Feature("disk.0.os.credentials.password", "=", "pass")]))
radl.add(deploy("s0", 1))
cloud = type("MyMock0", (CloudConnector, object), {})
cloud.launch = Mock(return_value=[(False, "e1")])
self.register_cloudconnector("Mock", cloud)
auth0 = self.getAuth([0], [], [("Mock", 0)])
infID = IM.CreateInfrastructure(str(radl), auth0)
res = IM.GetInfrastructureState(infID, auth0)
self.assertEqual(res['state'], VirtualMachine.FAILED)
res = IM.GetInfrastructureContMsg(infID, auth0)
self.assertIn(("VM 0:\nError launching the VMs of type s0 to cloud ID cloud0 of type Mock. "
"Attempt 1: e1\nAttempt 2: e1\nAttempt 3: e1"), res)

def test_inf_auth(self):
"""Try to access not owned Infs."""

Expand Down Expand Up @@ -297,10 +311,10 @@ def test_inf_addresources_without_credentials(self):
vms = IM.AddResource(infId, str(radl), auth0)

self.assertEqual(vms, [0])

res = IM.GetInfrastructureState(infId, auth0)
self.assertEqual(res['state'], VirtualMachine.FAILED)

res = IM.GetVMContMsg(infId, 0, auth0)
self.assertEqual(res, ("Error launching the VMs of type s0 to cloud ID cloud0 of type Dummy."
" No username for deploy: s0\n"))
Expand Down

0 comments on commit ea858ba

Please sign in to comment.