use of org.apache.heron.healthmgr.common.HealthManagerEvents.ContainerRestart in project heron by twitter.
the class RestartContainerResolver method resolve.
@Override
public Collection<Action> resolve(Collection<Diagnosis> diagnosis) {
publishingMetrics.executeResolver(RESTART_CONTAINER_RESOLVER);
// find all back pressure measurements reported in this execution cycle
Instant current = context.checkpoint();
Instant previous = context.previousCheckpoint();
SymptomsTable bpSymptoms = context.symptoms().type(SYMPTOM_INSTANCE_BACK_PRESSURE.text()).between(previous, current);
if (bpSymptoms.size() == 0) {
LOG.fine("No back-pressure measurements found, ending as there's nothing to fix");
return Collections.EMPTY_LIST;
}
Collection<String> allBpInstances = new HashSet<>();
bpSymptoms.get().forEach(symptom -> allBpInstances.addAll(symptom.assignments()));
LOG.info(String.format("%d instances caused back-pressure", allBpInstances.size()));
Collection<String> stmgrIds = new HashSet<>();
allBpInstances.forEach(instanceId -> {
LOG.info("Id of instance causing back-pressure: " + instanceId);
int fromIndex = instanceId.indexOf('_') + 1;
int toIndex = instanceId.indexOf('_', fromIndex);
String stmgrId = instanceId.substring(fromIndex, toIndex);
stmgrIds.add(stmgrId);
});
stmgrIds.forEach(stmgrId -> {
LOG.info("Restarting container: " + stmgrId);
boolean b = schedulerClient.restartTopology(RestartTopologyRequest.newBuilder().setContainerIndex(Integer.valueOf(stmgrId)).setTopologyName(topologyName).build());
LOG.info("Restarted container result: " + b);
publishingMetrics.executeIncr("RestartContainer");
});
LOG.info("Broadcasting container restart event");
ContainerRestart action = new ContainerRestart(current, stmgrIds);
eventManager.onEvent(action);
return Collections.singletonList(action);
}
Aggregations