use of software.amazon.awssdk.services.greengrassv2.model.DeploymentFailureHandlingPolicy.DO_NOTHING in project aws-greengrass-nucleus by aws-greengrass.
the class BaseE2ETestCase method draftAndCreateDeployment.
@SuppressWarnings("PMD.LinguisticNaming")
protected CreateDeploymentResponse draftAndCreateDeployment(CreateDeploymentRequest createDeploymentRequest) {
// update package name with random suffix to avoid conflict in cloud
Map<String, ComponentDeploymentSpecification> updatedPkgMetadata = new HashMap<>();
createDeploymentRequest.components().forEach((key, val) -> updatedPkgMetadata.put(getTestComponentNameInCloud(key), val));
createDeploymentRequest = createDeploymentRequest.toBuilder().components(updatedPkgMetadata).build();
// set default value
if (createDeploymentRequest.targetArn() == null) {
createDeploymentRequest = createDeploymentRequest.toBuilder().targetArn(thingGroupArn).build();
}
if (createDeploymentRequest.deploymentPolicies() == null) {
createDeploymentRequest = createDeploymentRequest.toBuilder().deploymentPolicies(DeploymentPolicies.builder().configurationValidationPolicy(DeploymentConfigurationValidationPolicy.builder().timeoutInSeconds(120).build()).componentUpdatePolicy(DeploymentComponentUpdatePolicy.builder().action(NOTIFY_COMPONENTS).timeoutInSeconds(120).build()).failureHandlingPolicy(DO_NOTHING).build()).build();
}
logger.atInfo().kv("CreateDeploymentRequest", createDeploymentRequest).log();
CreateDeploymentResponse createDeploymentResult = greengrassClient.createDeployment(createDeploymentRequest);
logger.atInfo().kv("CreateDeploymentResult", createDeploymentResult).log();
// Keep track of deployments to clean up
createdDeployments.add(CancelDeploymentRequest.builder().deploymentId(createDeploymentResult.deploymentId()).build());
return createDeploymentResult;
}
use of software.amazon.awssdk.services.greengrassv2.model.DeploymentFailureHandlingPolicy.DO_NOTHING in project aws-greengrass-nucleus by aws-greengrass.
the class DeploymentE2ETest method GIVEN_deployment_fails_due_to_service_broken_WHEN_deploy_fix_THEN_service_run_and_job_is_successful.
@Timeout(value = 10, unit = TimeUnit.MINUTES)
@Test
void GIVEN_deployment_fails_due_to_service_broken_WHEN_deploy_fix_THEN_service_run_and_job_is_successful(ExtensionContext context) throws Exception {
ignoreExceptionUltimateCauseWithMessage(context, "Service " + getTestComponentNameInCloud("CustomerApp") + " in broken state after deployment");
// Create first Job Doc with a faulty service (CustomerApp-0.9.0)
CreateDeploymentRequest createDeploymentRequest = CreateDeploymentRequest.builder().deploymentPolicies(DeploymentPolicies.builder().configurationValidationPolicy(DeploymentConfigurationValidationPolicy.builder().timeoutInSeconds(120).build()).componentUpdatePolicy(DeploymentComponentUpdatePolicy.builder().action(SKIP_NOTIFY_COMPONENTS).timeoutInSeconds(120).build()).failureHandlingPolicy(DO_NOTHING).build()).components(Utils.immutableMap("CustomerApp", ComponentDeploymentSpecification.builder().componentVersion("0.9.0").build())).build();
CreateDeploymentResponse createDeploymentResult = draftAndCreateDeployment(createDeploymentRequest);
// Wait for deployment job to fail after three retries of starting CustomerApp
IotJobsUtils.waitForJobExecutionStatusToSatisfy(iotClient, createDeploymentResult.iotJobId(), thingInfo.getThingName(), Duration.ofMinutes(7), s -> s.equals(JobExecutionStatus.FAILED));
// CustomerApp should be in BROKEN state
assertEquals(State.BROKEN, getCloudDeployedComponent("CustomerApp").getState());
// Create another job with a fix to the faulty service (CustomerApp-0.9.1).
CreateDeploymentRequest createDeploymentRequest2 = CreateDeploymentRequest.builder().components(Utils.immutableMap("CustomerApp", ComponentDeploymentSpecification.builder().componentVersion("0.9.1").build())).build();
CreateDeploymentResponse createDeploymentResult2 = draftAndCreateDeployment(createDeploymentRequest2);
IotJobsUtils.waitForJobExecutionStatusToSatisfy(iotClient, createDeploymentResult2.iotJobId(), thingInfo.getThingName(), Duration.ofMinutes(5), s -> s.equals(JobExecutionStatus.SUCCEEDED));
// Ensure that main is FINISHED and CustomerApp is RUNNING.
assertThat(kernel.getMain()::getState, eventuallyEval(is(State.FINISHED)));
assertEquals(State.RUNNING, getCloudDeployedComponent("CustomerApp").getState());
}
use of software.amazon.awssdk.services.greengrassv2.model.DeploymentFailureHandlingPolicy.DO_NOTHING in project aws-greengrass-nucleus by aws-greengrass.
the class DeploymentE2ETest method GIVEN_deployment_received_WHEN_skip_update_policy_check_THEN_update_policy_check_skipped.
@Timeout(value = 10, unit = TimeUnit.MINUTES)
@Test
void GIVEN_deployment_received_WHEN_skip_update_policy_check_THEN_update_policy_check_skipped() throws Exception {
// GIVEN
// First Deployment to have a service running in Kernel which has a update policy that always returns
// false, i.e. keeps waiting forever
CreateDeploymentRequest createDeploymentRequest1 = CreateDeploymentRequest.builder().components(Utils.immutableMap("NonDisruptableService", ComponentDeploymentSpecification.builder().componentVersion("1.0.0").build())).build();
CreateDeploymentResponse result1 = draftAndCreateDeployment(createDeploymentRequest1);
IotJobsUtils.waitForJobExecutionStatusToSatisfy(iotClient, result1.iotJobId(), thingInfo.getThingName(), Duration.ofMinutes(3), s -> s.equals(JobExecutionStatus.SUCCEEDED));
CountDownLatch updatePolicyCheckSkipped = new CountDownLatch(1);
Consumer<GreengrassLogMessage> logListener = m -> {
if (m.getMessage().contains("Deployment is configured to skip update policy check")) {
updatePolicyCheckSkipped.countDown();
}
};
Slf4jLogAdapter.addGlobalListener(logListener);
// WHEN
// Second deployment to update the service with SKIP_NOTIFY_COMPONENTS
CreateDeploymentRequest createDeploymentRequest2 = CreateDeploymentRequest.builder().deploymentPolicies(DeploymentPolicies.builder().configurationValidationPolicy(DeploymentConfigurationValidationPolicy.builder().timeoutInSeconds(120).build()).failureHandlingPolicy(DO_NOTHING).componentUpdatePolicy(DeploymentComponentUpdatePolicy.builder().action(SKIP_NOTIFY_COMPONENTS).timeoutInSeconds(120).build()).build()).components(Utils.immutableMap("NonDisruptableService", ComponentDeploymentSpecification.builder().componentVersion("1.0.1").build())).build();
CreateDeploymentResponse result2 = draftAndCreateDeployment(createDeploymentRequest2);
IotJobsUtils.waitForJobExecutionStatusToSatisfy(iotClient, result2.iotJobId(), thingInfo.getThingName(), Duration.ofMinutes(3), s -> s.equals(JobExecutionStatus.IN_PROGRESS));
// THEN
assertTrue(updatePolicyCheckSkipped.await(60, TimeUnit.SECONDS));
Slf4jLogAdapter.removeGlobalListener(logListener);
IotJobsUtils.waitForJobExecutionStatusToSatisfy(iotClient, result2.iotJobId(), thingInfo.getThingName(), Duration.ofMinutes(3), s -> s.equals(JobExecutionStatus.SUCCEEDED));
// Ensure that main is finished, which is its terminal state, so this means that all updates ought to be done
assertThat(kernel.getMain()::getState, eventuallyEval(is(State.FINISHED)));
assertThat(getCloudDeployedComponent("NonDisruptableService")::getState, eventuallyEval(is(State.RUNNING)));
assertEquals("1.0.1", getCloudDeployedComponent("NonDisruptableService").getConfig().find("version").getOnce());
}
use of software.amazon.awssdk.services.greengrassv2.model.DeploymentFailureHandlingPolicy.DO_NOTHING in project aws-greengrass-nucleus by aws-greengrass.
the class DeploymentE2ETest method GIVEN_deployment_in_progress_with_more_jobs_queued_in_cloud_WHEN_cancel_event_received_and_kernel_is_waiting_for_safe_time_THEN_deployment_should_be_canceled.
@Timeout(value = 10, unit = TimeUnit.MINUTES)
@Test
void GIVEN_deployment_in_progress_with_more_jobs_queued_in_cloud_WHEN_cancel_event_received_and_kernel_is_waiting_for_safe_time_THEN_deployment_should_be_canceled() throws Exception {
// First Deployment to have a service running in Kernel which has a safety check that always returns
// false, i.e. keeps waiting forever
CreateDeploymentRequest createDeploymentRequest1 = CreateDeploymentRequest.builder().components(Utils.immutableMap("NonDisruptableService", ComponentDeploymentSpecification.builder().componentVersion("1.0.0").build())).build();
CreateDeploymentResponse result1 = draftAndCreateDeployment(createDeploymentRequest1);
IotJobsUtils.waitForJobExecutionStatusToSatisfy(iotClient, result1.iotJobId(), thingInfo.getThingName(), Duration.ofMinutes(3), s -> s.equals(JobExecutionStatus.SUCCEEDED));
Consumer<GreengrassLogMessage> logListener = null;
try (EventStreamRPCConnection connection = IPCTestUtils.getEventStreamRpcConnection(kernel, "NonDisruptableService" + testComponentSuffix)) {
GreengrassCoreIPCClient ipcClient = new GreengrassCoreIPCClient(connection);
ipcClient.subscribeToComponentUpdates(new SubscribeToComponentUpdatesRequest(), Optional.of(new StreamResponseHandler<ComponentUpdatePolicyEvents>() {
@Override
public void onStreamEvent(ComponentUpdatePolicyEvents streamEvent) {
if (streamEvent.getPreUpdateEvent() != null) {
logger.atInfo().log("Got pre component update event");
DeferComponentUpdateRequest deferComponentUpdateRequest = new DeferComponentUpdateRequest();
deferComponentUpdateRequest.setRecheckAfterMs(TimeUnit.SECONDS.toMillis(60));
deferComponentUpdateRequest.setMessage("NonDisruptableService");
logger.atInfo().log("Sending defer request");
// Cannot wait inside a callback
ipcClient.deferComponentUpdate(deferComponentUpdateRequest, Optional.empty());
}
}
@Override
public boolean onStreamError(Throwable error) {
logger.atError().setCause(error).log("Caught stream error while subscribing for component update");
return false;
}
@Override
public void onStreamClosed() {
}
}));
CountDownLatch updateRegistered = new CountDownLatch(1);
CountDownLatch deploymentCancelled = new CountDownLatch(1);
logListener = m -> {
if ("register-service-update-action".equals(m.getEventType())) {
updateRegistered.countDown();
}
if (m.getMessage() != null && m.getMessage().contains("Deployment was cancelled")) {
deploymentCancelled.countDown();
}
};
Slf4jLogAdapter.addGlobalListener(logListener);
// Second deployment to update the service which is currently running an important task so deployment should
// keep waiting for a safe time to update
CreateDeploymentRequest createDeploymentRequest2 = CreateDeploymentRequest.builder().deploymentPolicies(DeploymentPolicies.builder().configurationValidationPolicy(DeploymentConfigurationValidationPolicy.builder().timeoutInSeconds(120).build()).failureHandlingPolicy(DO_NOTHING).componentUpdatePolicy(DeploymentComponentUpdatePolicy.builder().action(NOTIFY_COMPONENTS).timeoutInSeconds(120).build()).build()).components(Utils.immutableMap("NonDisruptableService", ComponentDeploymentSpecification.builder().componentVersion("1.0.1").build())).build();
CreateDeploymentResponse result2 = draftAndCreateDeployment(createDeploymentRequest2);
IotJobsUtils.waitForJobExecutionStatusToSatisfy(iotClient, result2.iotJobId(), thingInfo.getThingName(), Duration.ofMinutes(3), s -> s.equals(JobExecutionStatus.IN_PROGRESS));
// Create one more deployment so that it's queued in cloud
CreateDeploymentRequest createDeploymentRequest3 = CreateDeploymentRequest.builder().deploymentPolicies(DeploymentPolicies.builder().configurationValidationPolicy(DeploymentConfigurationValidationPolicy.builder().timeoutInSeconds(120).build()).failureHandlingPolicy(DO_NOTHING).componentUpdatePolicy(DeploymentComponentUpdatePolicy.builder().action(NOTIFY_COMPONENTS).timeoutInSeconds(120).build()).build()).components(Utils.immutableMap("NonDisruptableService", ComponentDeploymentSpecification.builder().componentVersion("1.0.1").build())).build();
CreateDeploymentResponse result3 = draftAndCreateDeployment(createDeploymentRequest3);
// Wait for the second deployment to start waiting for safe time to update and
// then cancel it's corresponding job from cloud
assertTrue(updateRegistered.await(60, TimeUnit.SECONDS));
UpdateSystemPolicyService updateSystemPolicyService = kernel.getContext().get(UpdateSystemPolicyService.class);
assertThat("The UpdateSystemService should have one pending action.", updateSystemPolicyService.getPendingActions(), IsCollectionWithSize.hasSize(1));
// Get the value of the pending Action
String pendingAction = updateSystemPolicyService.getPendingActions().iterator().next();
// GG_NEEDS_REVIEW: TODO : Call Fleet configuration service's cancel API when ready instead of calling IoT Jobs API
IotJobsUtils.cancelJob(iotClient, result2.iotJobId());
// Wait for indication that cancellation has gone through
assertTrue(deploymentCancelled.await(240, TimeUnit.SECONDS));
// the third deployment may have reached device.
Set<String> pendingActions = updateSystemPolicyService.getPendingActions();
if (pendingActions.size() == 1) {
String newPendingAction = pendingActions.iterator().next();
assertNotEquals(pendingAction, newPendingAction, "The UpdateSystemService's one pending action should be be replaced.");
} else if (pendingActions.size() > 1) {
fail("Deployment not cancelled, pending actions: " + updateSystemPolicyService.getPendingActions());
}
// Now that we've verified that the job got cancelled, let's verify that the next job was picked up
// and put into IN_PROGRESS state
IotJobsUtils.waitForJobExecutionStatusToSatisfy(iotClient, result3.iotJobId(), thingInfo.getThingName(), Duration.ofMinutes(3), s -> s.equals(JobExecutionStatus.IN_PROGRESS));
// Ensure that main is finished, which is its terminal state, so this means that all updates ought to be done
assertThat(kernel.getMain()::getState, eventuallyEval(is(State.FINISHED)));
assertThat(getCloudDeployedComponent("NonDisruptableService")::getState, eventuallyEval(is(State.RUNNING)));
assertEquals("1.0.0", getCloudDeployedComponent("NonDisruptableService").getConfig().find("version").getOnce());
} finally {
if (logListener != null) {
Slf4jLogAdapter.removeGlobalListener(logListener);
}
}
}
use of software.amazon.awssdk.services.greengrassv2.model.DeploymentFailureHandlingPolicy.DO_NOTHING in project aws-greengrass-nucleus by aws-greengrass.
the class DeploymentE2ETest method GIVEN_some_running_services_WHEN_cancel_event_received_and_kernel_is_waiting_for_disruptable_time_THEN_deployment_should_be_canceled.
@Timeout(value = 10, unit = TimeUnit.MINUTES)
@Test
void GIVEN_some_running_services_WHEN_cancel_event_received_and_kernel_is_waiting_for_disruptable_time_THEN_deployment_should_be_canceled() throws Exception {
// First Deployment to have a service running in Kernel which has a update policy check that always returns
// false, i.e. keeps waiting forever
CreateDeploymentRequest createDeploymentRequest1 = CreateDeploymentRequest.builder().components(Utils.immutableMap("NonDisruptableService", ComponentDeploymentSpecification.builder().componentVersion("1.0.0").build())).build();
CreateDeploymentResponse createDeploymentResult1 = draftAndCreateDeployment(createDeploymentRequest1);
IotJobsUtils.waitForJobExecutionStatusToSatisfy(iotClient, createDeploymentResult1.iotJobId(), thingInfo.getThingName(), Duration.ofMinutes(3), s -> s.equals(JobExecutionStatus.SUCCEEDED));
CountDownLatch postUpdateEventReceived = new CountDownLatch(1);
Consumer<GreengrassLogMessage> logListener = null;
try (EventStreamRPCConnection connection = IPCTestUtils.getEventStreamRpcConnection(kernel, "NonDisruptableService" + testComponentSuffix)) {
GreengrassCoreIPCClient ipcClient = new GreengrassCoreIPCClient(connection);
ipcClient.subscribeToComponentUpdates(new SubscribeToComponentUpdatesRequest(), Optional.of(new StreamResponseHandler<ComponentUpdatePolicyEvents>() {
@Override
public void onStreamEvent(ComponentUpdatePolicyEvents streamEvent) {
if (streamEvent.getPreUpdateEvent() != null) {
DeferComponentUpdateRequest deferComponentUpdateRequest = new DeferComponentUpdateRequest();
deferComponentUpdateRequest.setRecheckAfterMs(TimeUnit.SECONDS.toMillis(60));
deferComponentUpdateRequest.setMessage("NonDisruptableService");
// Cannot wait for response inside a callback
ipcClient.deferComponentUpdate(deferComponentUpdateRequest, Optional.empty());
}
if (streamEvent.getPostUpdateEvent() != null) {
postUpdateEventReceived.countDown();
}
}
@Override
public boolean onStreamError(Throwable error) {
logger.atError().setCause(error).log("Caught stream error while subscribing for component update");
return false;
}
@Override
public void onStreamClosed() {
}
}));
// Second deployment to update the service which is currently running an important task so deployment should
// wait for a disruptable time to update
CreateDeploymentRequest createDeploymentRequest2 = CreateDeploymentRequest.builder().deploymentPolicies(DeploymentPolicies.builder().failureHandlingPolicy(DO_NOTHING).configurationValidationPolicy(DeploymentConfigurationValidationPolicy.builder().timeoutInSeconds(120).build()).componentUpdatePolicy(DeploymentComponentUpdatePolicy.builder().action(NOTIFY_COMPONENTS).timeoutInSeconds(120).build()).build()).components(Utils.immutableMap("NonDisruptableService", ComponentDeploymentSpecification.builder().componentVersion("1.0.1").build())).build();
CreateDeploymentResponse createDeploymentResult2 = draftAndCreateDeployment(createDeploymentRequest2);
CountDownLatch updateRegistered = new CountDownLatch(1);
CountDownLatch deploymentCancelled = new CountDownLatch(1);
logListener = m -> {
if ("register-service-update-action".equals(m.getEventType())) {
updateRegistered.countDown();
}
if (m.getMessage() != null && m.getMessage().contains("Deployment was cancelled")) {
deploymentCancelled.countDown();
}
};
Slf4jLogAdapter.addGlobalListener(logListener);
IotJobsUtils.waitForJobExecutionStatusToSatisfy(iotClient, createDeploymentResult2.iotJobId(), thingInfo.getThingName(), Duration.ofMinutes(3), s -> s.equals(JobExecutionStatus.IN_PROGRESS));
// Wait for the second deployment to start waiting for safe time to update and
// then cancel it's corresponding job from cloud
assertTrue(updateRegistered.await(60, TimeUnit.SECONDS));
assertThat("The UpdateSystemService should have one pending action.", kernel.getContext().get(UpdateSystemPolicyService.class).getPendingActions(), IsCollectionWithSize.hasSize(1));
// GG_NEEDS_REVIEW: TODO : Call Fleet configuration service's cancel API when ready instead of calling IoT Jobs API
IotJobsUtils.cancelJob(iotClient, createDeploymentResult2.iotJobId());
// Wait for indication that cancellation has gone through
assertTrue(deploymentCancelled.await(60, TimeUnit.SECONDS));
assertThat("The UpdateSystemService's one pending action should be be removed.", kernel.getContext().get(UpdateSystemPolicyService.class).getPendingActions(), IsCollectionWithSize.hasSize(0));
// Component should be told to resume its work since the change it has been waiting for is cancelled
assertTrue(postUpdateEventReceived.await(60, TimeUnit.SECONDS));
// Ensure that main is finished, which is its terminal state, so this means that all updates ought to be done
assertThat(kernel.getMain()::getState, eventuallyEval(is(State.FINISHED)));
assertThat(getCloudDeployedComponent("NonDisruptableService")::getState, eventuallyEval(is(State.RUNNING)));
assertEquals("1.0.0", getCloudDeployedComponent("NonDisruptableService").getConfig().find("version").getOnce());
} finally {
if (logListener != null) {
Slf4jLogAdapter.removeGlobalListener(logListener);
}
}
}
Aggregations