use of com.microsoft.dhalion.core.SymptomsTable in project heron by twitter.
the class RestartContainerResolver method resolve.
@Override
public Collection<Action> resolve(Collection<Diagnosis> diagnosis) {
publishingMetrics.executeResolver(RESTART_CONTAINER_RESOLVER);
// find all back pressure measurements reported in this execution cycle
Instant current = context.checkpoint();
Instant previous = context.previousCheckpoint();
SymptomsTable bpSymptoms = context.symptoms().type(SYMPTOM_INSTANCE_BACK_PRESSURE.text()).between(previous, current);
if (bpSymptoms.size() == 0) {
LOG.fine("No back-pressure measurements found, ending as there's nothing to fix");
return Collections.EMPTY_LIST;
}
Collection<String> allBpInstances = new HashSet<>();
bpSymptoms.get().forEach(symptom -> allBpInstances.addAll(symptom.assignments()));
LOG.info(String.format("%d instances caused back-pressure", allBpInstances.size()));
Collection<String> stmgrIds = new HashSet<>();
allBpInstances.forEach(instanceId -> {
LOG.info("Id of instance causing back-pressure: " + instanceId);
int fromIndex = instanceId.indexOf('_') + 1;
int toIndex = instanceId.indexOf('_', fromIndex);
String stmgrId = instanceId.substring(fromIndex, toIndex);
stmgrIds.add(stmgrId);
});
stmgrIds.forEach(stmgrId -> {
LOG.info("Restarting container: " + stmgrId);
boolean b = schedulerClient.restartTopology(RestartTopologyRequest.newBuilder().setContainerIndex(Integer.valueOf(stmgrId)).setTopologyName(topologyName).build());
LOG.info("Restarted container result: " + b);
publishingMetrics.executeIncr("RestartContainer");
});
LOG.info("Broadcasting container restart event");
ContainerRestart action = new ContainerRestart(current, stmgrIds);
eventManager.onEvent(action);
return Collections.singletonList(action);
}
use of com.microsoft.dhalion.core.SymptomsTable in project heron by twitter.
the class ProcessingRateSkewDetectorTest method testReturnsMultipleComponents.
@Test
public void testReturnsMultipleComponents() {
HealthPolicyConfig config = mock(HealthPolicyConfig.class);
when(config.getConfig(CONF_SKEW_RATIO, 1.5)).thenReturn(2.5);
Measurement measurement1 = new Measurement("bolt", "i1", METRIC_EXE_COUNT.text(), Instant.ofEpochSecond(1497892222), 1000);
Measurement measurement2 = new Measurement("bolt", "i2", METRIC_EXE_COUNT.text(), Instant.ofEpochSecond(1497892222), 200.0);
Measurement measurement3 = new Measurement("bolt2", "i3", METRIC_EXE_COUNT.text(), Instant.ofEpochSecond(1497892222), 1000);
Measurement measurement4 = new Measurement("bolt2", "i4", METRIC_EXE_COUNT.text(), Instant.ofEpochSecond(1497892222), 200.0);
Measurement measurement5 = new Measurement("bolt3", "i5", METRIC_EXE_COUNT.text(), Instant.ofEpochSecond(1497892222), 1000);
Measurement measurement6 = new Measurement("bolt3", "i6", METRIC_EXE_COUNT.text(), Instant.ofEpochSecond(1497892222), 500.0);
Collection<Measurement> metrics = new ArrayList<>();
metrics.add(measurement1);
metrics.add(measurement2);
metrics.add(measurement3);
metrics.add(measurement4);
metrics.add(measurement5);
metrics.add(measurement6);
ProcessingRateSkewDetector detector = new ProcessingRateSkewDetector(config);
PoliciesExecutor.ExecutionContext context = mock(PoliciesExecutor.ExecutionContext.class);
when(context.checkpoint()).thenReturn(Instant.now());
detector.initialize(context);
Collection<Symptom> symptoms = detector.detect(metrics);
assertEquals(6, symptoms.size());
SymptomsTable symptomsTable = SymptomsTable.of(symptoms);
assertEquals(2, symptomsTable.type("POSITIVE " + BaseDetector.SymptomType.SYMPTOM_PROCESSING_RATE_SKEW).size());
assertEquals(2, symptomsTable.type("NEGATIVE " + BaseDetector.SymptomType.SYMPTOM_PROCESSING_RATE_SKEW).size());
assertEquals(1, symptomsTable.type("POSITIVE " + BaseDetector.SymptomType.SYMPTOM_PROCESSING_RATE_SKEW).assignment("i1").size());
assertEquals(1, symptomsTable.type("POSITIVE " + BaseDetector.SymptomType.SYMPTOM_PROCESSING_RATE_SKEW).assignment("i3").size());
assertEquals(1, symptomsTable.type("NEGATIVE " + BaseDetector.SymptomType.SYMPTOM_PROCESSING_RATE_SKEW).assignment("i2").size());
assertEquals(1, symptomsTable.type("NEGATIVE " + BaseDetector.SymptomType.SYMPTOM_PROCESSING_RATE_SKEW).assignment("i4").size());
}
use of com.microsoft.dhalion.core.SymptomsTable in project heron by twitter.
the class UnderProvisioningDiagnoser method diagnose.
@Override
public Collection<Diagnosis> diagnose(Collection<Symptom> symptoms) {
Collection<Diagnosis> diagnoses = new ArrayList<>();
SymptomsTable symptomsTable = SymptomsTable.of(symptoms);
SymptomsTable bp = symptomsTable.type(SYMPTOM_COMP_BACK_PRESSURE.text());
if (bp.size() > 1) {
// TODO handle cases where multiple detectors create back pressure symptom
throw new IllegalStateException("Multiple back-pressure symptoms case");
}
if (bp.size() == 0) {
return diagnoses;
}
String bpComponent = bp.first().assignments().iterator().next();
SymptomsTable processingRateSkew = symptomsTable.type(SYMPTOM_PROCESSING_RATE_SKEW.text());
SymptomsTable waitQSkew = symptomsTable.type(SYMPTOM_WAIT_Q_SIZE_SKEW.text());
if (waitQSkew.assignment(bpComponent).size() != 0 || processingRateSkew.assignment(bpComponent).size() != 0) {
return diagnoses;
}
Collection<String> assignments = Collections.singletonList(bpComponent);
LOG.info(String.format("UNDER_PROVISIONING: %s back-pressure and similar processing rates " + "and wait queue sizes", bpComponent));
diagnoses.add(new Diagnosis(DIAGNOSIS_UNDER_PROVISIONING.text(), context.checkpoint(), assignments));
return diagnoses;
}
Aggregations