use of com.microsoft.dhalion.core.SymptomsTable in project heron by twitter.
the class DataSkewDiagnoser method diagnose.
@Override
public Collection<Diagnosis> diagnose(Collection<Symptom> symptoms) {
Collection<Diagnosis> diagnoses = new ArrayList<>();
SymptomsTable symptomsTable = SymptomsTable.of(symptoms);
SymptomsTable bp = symptomsTable.type(SYMPTOM_COMP_BACK_PRESSURE.text());
if (bp.size() > 1) {
// TODO handle cases where multiple detectors create back pressure symptom
throw new IllegalStateException("Multiple back-pressure symptoms case");
}
if (bp.size() == 0) {
return diagnoses;
}
String bpComponent = bp.first().assignments().iterator().next();
SymptomsTable processingRateSkew = symptomsTable.type(SYMPTOM_PROCESSING_RATE_SKEW.text());
SymptomsTable waitQSkew = symptomsTable.type(SYMPTOM_WAIT_Q_SIZE_SKEW.text());
// verify data skew, larger queue size and back pressure for the same component exists
if (waitQSkew.assignment(bpComponent).size() == 0 || processingRateSkew.assignment(bpComponent).size() == 0) {
return diagnoses;
}
Collection<String> assignments = new ArrayList<>();
Instant newest = context.checkpoint();
Instant oldest = context.previousCheckpoint();
MeasurementsTable measurements = context.measurements().between(oldest, newest).component(bpComponent);
for (String instance : measurements.uniqueInstances()) {
MeasurementsTable instanceMeasurements = measurements.instance(instance);
double waitQSize = instanceMeasurements.type(METRIC_WAIT_Q_SIZE.text()).mean();
double processingRate = instanceMeasurements.type(METRIC_EXE_COUNT.text()).mean();
if ((measurements.type(METRIC_WAIT_Q_SIZE.text()).max() < waitQSize * 2) && (measurements.type(METRIC_EXE_COUNT.text()).max() < 1.10 * processingRate)) {
assignments.add(instance);
LOG.info(String.format("DataSkew: %s back-pressure, high execution count: %s and " + "high buffer size %s", instance, processingRate, waitQSize));
}
}
if (assignments.size() > 0) {
diagnoses.add(new Diagnosis(DIAGNOSIS_DATA_SKEW.text(), context.checkpoint(), assignments));
}
return diagnoses;
}
use of com.microsoft.dhalion.core.SymptomsTable in project heron by twitter.
the class SlowInstanceDiagnoser method diagnose.
@Override
public Collection<Diagnosis> diagnose(Collection<Symptom> symptoms) {
publishingMetrics.executeDiagnoserIncr(SLOW_INSTANCE_DIAGNOSER);
Collection<Diagnosis> diagnoses = new ArrayList<>();
SymptomsTable symptomsTable = SymptomsTable.of(symptoms);
SymptomsTable bp = symptomsTable.type(SYMPTOM_COMP_BACK_PRESSURE.text());
if (bp.size() > 1) {
// TODO handle cases where multiple detectors create back pressure symptom
throw new IllegalStateException("Multiple back-pressure symptoms case");
}
if (bp.size() == 0) {
return diagnoses;
}
String bpComponent = bp.first().assignments().iterator().next();
SymptomsTable processingRateSkew = symptomsTable.type(SYMPTOM_PROCESSING_RATE_SKEW.text());
SymptomsTable waitQSkew = symptomsTable.type(SYMPTOM_WAIT_Q_SIZE_SKEW.text());
// exist
if (waitQSkew.assignment(bpComponent).size() == 0 || processingRateSkew.assignment(bpComponent).size() > 0) {
// TODO in a short window rate skew could exist
return diagnoses;
}
Collection<String> assignments = new ArrayList<>();
Instant newest = context.checkpoint();
Instant oldest = context.previousCheckpoint();
MeasurementsTable measurements = context.measurements().between(oldest, newest).component(bpComponent);
for (String instance : measurements.uniqueInstances()) {
MeasurementsTable instanceMeasurements = measurements.instance(instance);
double waitQSize = instanceMeasurements.type(METRIC_WAIT_Q_SIZE.text()).mean();
if (measurements.type(METRIC_WAIT_Q_SIZE.text()).max() < waitQSize * 2) {
assignments.add(instance);
LOG.info(String.format("SLOW: %s back-pressure and high buffer size: %s " + "and similar processing rates", instance, waitQSize));
}
}
if (assignments.size() > 0) {
Instant now = context.checkpoint();
diagnoses.add(new Diagnosis(DIAGNOSIS_SLOW_INSTANCE.text(), now, assignments));
}
return diagnoses;
}
use of com.microsoft.dhalion.core.SymptomsTable in project heron by twitter.
the class BackPressureDetectorTest method testConfigAndFilter.
@Test
public void testConfigAndFilter() throws IOException {
HealthPolicyConfig config = mock(HealthPolicyConfig.class);
when(config.getConfig(CONF_NOISE_FILTER, 20)).thenReturn(50);
Measurement measurement1 = new Measurement("bolt", "i1", METRIC_BACK_PRESSURE.text(), now, 55);
Measurement measurement2 = new Measurement("bolt", "i2", METRIC_BACK_PRESSURE.text(), now, 3);
Measurement measurement3 = new Measurement("bolt", "i3", METRIC_BACK_PRESSURE.text(), now, 0);
Collection<Measurement> metrics = new ArrayList<>();
metrics.add(measurement1);
metrics.add(measurement2);
metrics.add(measurement3);
HealthManagerMetrics publishingMetrics = mock(HealthManagerMetrics.class);
BackPressureDetector detector = new BackPressureDetector(config, publishingMetrics);
PoliciesExecutor.ExecutionContext context = mock(PoliciesExecutor.ExecutionContext.class);
when(context.checkpoint()).thenReturn(now);
detector.initialize(context);
Collection<Symptom> symptoms = detector.detect(metrics);
Assert.assertEquals(2, symptoms.size());
SymptomsTable compSymptom = SymptomsTable.of(symptoms).type(SYMPTOM_COMP_BACK_PRESSURE.text());
Assert.assertEquals(1, compSymptom.size());
Assert.assertEquals(1, compSymptom.get().iterator().next().assignments().size());
SymptomsTable instanceSymptom = SymptomsTable.of(symptoms).type(SYMPTOM_INSTANCE_BACK_PRESSURE.text());
Assert.assertEquals(1, instanceSymptom.size());
Assert.assertEquals(1, instanceSymptom.get().iterator().next().assignments().size());
Symptom symptom = symptoms.iterator().next();
measurement1 = new Measurement("bolt", "i1", METRIC_BACK_PRESSURE.text(), now, 45);
measurement2 = new Measurement("bolt", "i2", METRIC_BACK_PRESSURE.text(), now, 3);
metrics = new ArrayList<>();
metrics.add(measurement1);
metrics.add(measurement2);
detector = new BackPressureDetector(config, publishingMetrics);
detector.initialize(context);
symptoms = detector.detect(metrics);
Assert.assertEquals(0, symptoms.size());
}
use of com.microsoft.dhalion.core.SymptomsTable in project heron by twitter.
the class ProcessingRateSkewDetectorTest method testConfigAndFilter.
@Test
public void testConfigAndFilter() {
HealthPolicyConfig config = mock(HealthPolicyConfig.class);
when(config.getConfig(CONF_SKEW_RATIO, 1.5)).thenReturn(2.5);
Measurement measurement1 = new Measurement("bolt", "i1", METRIC_EXE_COUNT.text(), Instant.ofEpochSecond(1497892222), 1000);
Measurement measurement2 = new Measurement("bolt", "i2", METRIC_EXE_COUNT.text(), Instant.ofEpochSecond(1497892222), 200.0);
Collection<Measurement> metrics = new ArrayList<>();
metrics.add(measurement1);
metrics.add(measurement2);
ProcessingRateSkewDetector detector = new ProcessingRateSkewDetector(config);
PoliciesExecutor.ExecutionContext context = mock(PoliciesExecutor.ExecutionContext.class);
when(context.checkpoint()).thenReturn(Instant.now());
detector.initialize(context);
Collection<Symptom> symptoms = detector.detect(metrics);
assertEquals(3, symptoms.size());
SymptomsTable symptomsTable = SymptomsTable.of(symptoms);
assertEquals(1, symptomsTable.type("POSITIVE " + BaseDetector.SymptomType.SYMPTOM_PROCESSING_RATE_SKEW).size());
assertEquals(1, symptomsTable.type("NEGATIVE " + BaseDetector.SymptomType.SYMPTOM_PROCESSING_RATE_SKEW).size());
assertEquals(1, symptomsTable.type("POSITIVE " + BaseDetector.SymptomType.SYMPTOM_PROCESSING_RATE_SKEW).assignment("i1").size());
assertEquals(1, symptomsTable.type("NEGATIVE " + BaseDetector.SymptomType.SYMPTOM_PROCESSING_RATE_SKEW).assignment("i2").size());
measurement1 = new Measurement("bolt", "i1", METRIC_EXE_COUNT.text(), Instant.ofEpochSecond(1497892222), 1000);
measurement2 = new Measurement("bolt", "i2", METRIC_EXE_COUNT.text(), Instant.ofEpochSecond(1497892222), 500.0);
metrics = new ArrayList<>();
metrics.add(measurement1);
metrics.add(measurement2);
detector = new ProcessingRateSkewDetector(config);
detector.initialize(context);
symptoms = detector.detect(metrics);
assertEquals(0, symptoms.size());
}
use of com.microsoft.dhalion.core.SymptomsTable in project heron by twitter.
the class WaitQueueSkewDetectorTest method testConfigAndFilter.
@Test
public void testConfigAndFilter() {
HealthPolicyConfig config = mock(HealthPolicyConfig.class);
when(config.getConfig(CONF_SKEW_RATIO, 20.0)).thenReturn(15.0);
Measurement measurement1 = new Measurement("bolt", "i1", METRIC_WAIT_Q_SIZE.text(), Instant.ofEpochSecond(1497892222), 1501);
Measurement measurement2 = new Measurement("bolt", "i2", METRIC_WAIT_Q_SIZE.text(), Instant.ofEpochSecond(1497892222), 100.0);
Collection<Measurement> metrics = new ArrayList<>();
metrics.add(measurement1);
metrics.add(measurement2);
WaitQueueSkewDetector detector = new WaitQueueSkewDetector(config);
PoliciesExecutor.ExecutionContext context = mock(PoliciesExecutor.ExecutionContext.class);
when(context.checkpoint()).thenReturn(Instant.now());
detector.initialize(context);
Collection<Symptom> symptoms = detector.detect(metrics);
assertEquals(3, symptoms.size());
SymptomsTable symptomsTable = SymptomsTable.of(symptoms);
assertEquals(1, symptomsTable.type("POSITIVE " + BaseDetector.SymptomType.SYMPTOM_WAIT_Q_SIZE_SKEW).size());
assertEquals(1, symptomsTable.type("NEGATIVE " + BaseDetector.SymptomType.SYMPTOM_WAIT_Q_SIZE_SKEW).size());
assertEquals(1, symptomsTable.type("POSITIVE " + BaseDetector.SymptomType.SYMPTOM_WAIT_Q_SIZE_SKEW).assignment("i1").size());
assertEquals(1, symptomsTable.type("NEGATIVE " + BaseDetector.SymptomType.SYMPTOM_WAIT_Q_SIZE_SKEW).assignment("i2").size());
measurement1 = new Measurement("bolt", "i1", METRIC_WAIT_Q_SIZE.text(), Instant.ofEpochSecond(1497892222), 1500);
measurement2 = new Measurement("bolt", "i2", METRIC_WAIT_Q_SIZE.text(), Instant.ofEpochSecond(1497892222), 110.0);
metrics = new ArrayList<>();
metrics.add(measurement1);
metrics.add(measurement2);
detector = new WaitQueueSkewDetector(config);
detector.initialize(context);
symptoms = detector.detect(metrics);
assertEquals(0, symptoms.size());
}
Aggregations