use of com.microsoft.dhalion.diagnoser.Diagnosis in project incubator-heron by apache.
the class DataSkewDiagnoserTest method diagnosisNoDataSkewLowRate.
@Test
public void diagnosisNoDataSkewLowRate() {
List<Symptom> symptoms = TestUtils.createBpSymptomList(123, 0, 0);
symptoms.add(TestUtils.createExeCountSymptom(100, 2000, 2000));
symptoms.add(TestUtils.createWaitQueueDisparitySymptom(10000, 500, 500));
Diagnosis result = new DataSkewDiagnoser().diagnose(symptoms);
assertNull(result);
}
use of com.microsoft.dhalion.diagnoser.Diagnosis in project incubator-heron by apache.
the class DataSkewDiagnoserTest method diagnosis1DataSkewInstance.
@Test
public void diagnosis1DataSkewInstance() {
List<Symptom> symptoms = TestUtils.createBpSymptomList(123, 0, 0);
symptoms.add(TestUtils.createExeCountSymptom(5000, 2000, 2000));
symptoms.add(TestUtils.createWaitQueueDisparitySymptom(10000, 500, 500));
Diagnosis result = new DataSkewDiagnoser().diagnose(symptoms);
assertNotNull(result);
assertEquals(DIAGNOSIS_DATA_SKEW.text(), result.getName());
assertEquals(1, result.getSymptoms().size());
Symptom symptom = result.getSymptoms().values().iterator().next();
assertEquals(123, symptom.getComponent().getMetricValueSum("container_1_bolt_0", METRIC_BACK_PRESSURE.text()).intValue());
}
use of com.microsoft.dhalion.diagnoser.Diagnosis in project incubator-heron by apache.
the class UnderProvisioningDiagnoserTest method diagnosisFailsNotSimilarProcessingRates.
@Test
public void diagnosisFailsNotSimilarProcessingRates() {
List<Symptom> symptoms = TestUtils.createBpSymptomList(123, 0, 0);
symptoms.add(TestUtils.createExeCountSymptom(100, 500, 500));
Diagnosis result = new UnderProvisioningDiagnoser().diagnose(symptoms);
assertNull(result);
}
use of com.microsoft.dhalion.diagnoser.Diagnosis in project incubator-heron by apache.
the class SlowInstanceDiagnoser method diagnose.
@Override
public Diagnosis diagnose(List<Symptom> symptoms) {
List<Symptom> bpSymptoms = getBackPressureSymptoms(symptoms);
Map<String, ComponentMetrics> processingRateSkewComponents = getProcessingRateSkewComponents(symptoms);
Map<String, ComponentMetrics> waitQDisparityComponents = getWaitQDisparityComponents(symptoms);
if (bpSymptoms.isEmpty() || waitQDisparityComponents.isEmpty() || !processingRateSkewComponents.isEmpty()) {
// execution count, no action is needed
return null;
} else if (bpSymptoms.size() > 1) {
// TODO handle cases where multiple detectors create back pressure symptom
throw new IllegalStateException("Multiple back-pressure symptoms case");
}
ComponentMetrics bpMetrics = bpSymptoms.iterator().next().getComponent();
// verify wait Q disparity and back pressure for the same component exists
ComponentMetrics pendingBufferMetrics = waitQDisparityComponents.get(bpMetrics.getName());
if (pendingBufferMetrics == null) {
// no wait Q disparity for the component with back pressure. There is no slow instance
return null;
}
ComponentMetrics mergedData = ComponentMetrics.merge(bpMetrics, pendingBufferMetrics);
ComponentMetricsHelper compStats = new ComponentMetricsHelper(mergedData);
compStats.computeBpStats();
MetricsStats bufferStats = compStats.computeMinMaxStats(METRIC_BUFFER_SIZE);
Symptom resultSymptom = null;
for (InstanceMetrics boltMetrics : compStats.getBoltsWithBackpressure()) {
double bufferSize = boltMetrics.getMetricValueSum(METRIC_BUFFER_SIZE.text());
double bpValue = boltMetrics.getMetricValueSum(METRIC_BACK_PRESSURE.text());
if (bufferStats.getMetricMax() < bufferSize * 2) {
LOG.info(String.format("SLOW: %s back-pressure(%s) and high buffer size: %s " + "and similar processing rates", boltMetrics.getName(), bpValue, bufferSize));
resultSymptom = new Symptom(SYMPTOM_SLOW_INSTANCE.text(), mergedData);
}
}
return resultSymptom != null ? new Diagnosis(DIAGNOSIS_SLOW_INSTANCE.text(), resultSymptom) : null;
}
use of com.microsoft.dhalion.diagnoser.Diagnosis in project incubator-heron by apache.
the class RestartContainerResolver method resolve.
@Override
public List<Action> resolve(List<Diagnosis> diagnosis) {
List<Action> actions = new ArrayList<>();
for (Diagnosis diagnoses : diagnosis) {
Symptom bpSymptom = diagnoses.getSymptoms().get(SYMPTOM_SLOW_INSTANCE.text());
if (bpSymptom == null || bpSymptom.getComponents().isEmpty()) {
// nothing to fix as there is no back pressure
continue;
}
if (bpSymptom.getComponents().size() > 1) {
throw new UnsupportedOperationException("Multiple components with back pressure symptom");
}
// want to know which stmgr has backpressure
String stmgrId = null;
for (InstanceMetrics im : bpSymptom.getComponent().getMetrics().values()) {
if (im.hasMetricAboveLimit(METRIC_BACK_PRESSURE.text(), noiseFilterMillis)) {
String instanceId = im.getName();
int fromIndex = instanceId.indexOf('_') + 1;
int toIndex = instanceId.indexOf('_', fromIndex);
stmgrId = instanceId.substring(fromIndex, toIndex);
break;
}
}
LOG.info("Restarting container: " + stmgrId);
boolean b = schedulerClient.restartTopology(RestartTopologyRequest.newBuilder().setContainerIndex(Integer.valueOf(stmgrId)).setTopologyName(topologyName).build());
LOG.info("Restarted container result: " + b);
ContainerRestart action = new ContainerRestart();
LOG.info("Broadcasting container restart event");
eventManager.onEvent(action);
actions.add(action);
return actions;
}
return actions;
}
Aggregations