Search in sources :

Example 1 with SymptomsTable

use of com.microsoft.dhalion.core.SymptomsTable in project heron by twitter.

the class DataSkewDiagnoser method diagnose.

@Override
public Collection<Diagnosis> diagnose(Collection<Symptom> symptoms) {
    Collection<Diagnosis> diagnoses = new ArrayList<>();
    SymptomsTable symptomsTable = SymptomsTable.of(symptoms);
    SymptomsTable bp = symptomsTable.type(SYMPTOM_COMP_BACK_PRESSURE.text());
    if (bp.size() > 1) {
        // TODO handle cases where multiple detectors create back pressure symptom
        throw new IllegalStateException("Multiple back-pressure symptoms case");
    }
    if (bp.size() == 0) {
        return diagnoses;
    }
    String bpComponent = bp.first().assignments().iterator().next();
    SymptomsTable processingRateSkew = symptomsTable.type(SYMPTOM_PROCESSING_RATE_SKEW.text());
    SymptomsTable waitQSkew = symptomsTable.type(SYMPTOM_WAIT_Q_SIZE_SKEW.text());
    // verify data skew, larger queue size and back pressure for the same component exists
    if (waitQSkew.assignment(bpComponent).size() == 0 || processingRateSkew.assignment(bpComponent).size() == 0) {
        return diagnoses;
    }
    Collection<String> assignments = new ArrayList<>();
    Instant newest = context.checkpoint();
    Instant oldest = context.previousCheckpoint();
    MeasurementsTable measurements = context.measurements().between(oldest, newest).component(bpComponent);
    for (String instance : measurements.uniqueInstances()) {
        MeasurementsTable instanceMeasurements = measurements.instance(instance);
        double waitQSize = instanceMeasurements.type(METRIC_WAIT_Q_SIZE.text()).mean();
        double processingRate = instanceMeasurements.type(METRIC_EXE_COUNT.text()).mean();
        if ((measurements.type(METRIC_WAIT_Q_SIZE.text()).max() < waitQSize * 2) && (measurements.type(METRIC_EXE_COUNT.text()).max() < 1.10 * processingRate)) {
            assignments.add(instance);
            LOG.info(String.format("DataSkew: %s back-pressure, high execution count: %s and " + "high buffer size %s", instance, processingRate, waitQSize));
        }
    }
    if (assignments.size() > 0) {
        diagnoses.add(new Diagnosis(DIAGNOSIS_DATA_SKEW.text(), context.checkpoint(), assignments));
    }
    return diagnoses;
}
Also used : MeasurementsTable(com.microsoft.dhalion.core.MeasurementsTable) Instant(java.time.Instant) ArrayList(java.util.ArrayList) Diagnosis(com.microsoft.dhalion.core.Diagnosis) SymptomsTable(com.microsoft.dhalion.core.SymptomsTable)

Example 2 with SymptomsTable

use of com.microsoft.dhalion.core.SymptomsTable in project heron by twitter.

the class SlowInstanceDiagnoser method diagnose.

@Override
public Collection<Diagnosis> diagnose(Collection<Symptom> symptoms) {
    publishingMetrics.executeDiagnoserIncr(SLOW_INSTANCE_DIAGNOSER);
    Collection<Diagnosis> diagnoses = new ArrayList<>();
    SymptomsTable symptomsTable = SymptomsTable.of(symptoms);
    SymptomsTable bp = symptomsTable.type(SYMPTOM_COMP_BACK_PRESSURE.text());
    if (bp.size() > 1) {
        // TODO handle cases where multiple detectors create back pressure symptom
        throw new IllegalStateException("Multiple back-pressure symptoms case");
    }
    if (bp.size() == 0) {
        return diagnoses;
    }
    String bpComponent = bp.first().assignments().iterator().next();
    SymptomsTable processingRateSkew = symptomsTable.type(SYMPTOM_PROCESSING_RATE_SKEW.text());
    SymptomsTable waitQSkew = symptomsTable.type(SYMPTOM_WAIT_Q_SIZE_SKEW.text());
    // exist
    if (waitQSkew.assignment(bpComponent).size() == 0 || processingRateSkew.assignment(bpComponent).size() > 0) {
        // TODO in a short window rate skew could exist
        return diagnoses;
    }
    Collection<String> assignments = new ArrayList<>();
    Instant newest = context.checkpoint();
    Instant oldest = context.previousCheckpoint();
    MeasurementsTable measurements = context.measurements().between(oldest, newest).component(bpComponent);
    for (String instance : measurements.uniqueInstances()) {
        MeasurementsTable instanceMeasurements = measurements.instance(instance);
        double waitQSize = instanceMeasurements.type(METRIC_WAIT_Q_SIZE.text()).mean();
        if (measurements.type(METRIC_WAIT_Q_SIZE.text()).max() < waitQSize * 2) {
            assignments.add(instance);
            LOG.info(String.format("SLOW: %s back-pressure and high buffer size: %s " + "and similar processing rates", instance, waitQSize));
        }
    }
    if (assignments.size() > 0) {
        Instant now = context.checkpoint();
        diagnoses.add(new Diagnosis(DIAGNOSIS_SLOW_INSTANCE.text(), now, assignments));
    }
    return diagnoses;
}
Also used : MeasurementsTable(com.microsoft.dhalion.core.MeasurementsTable) Instant(java.time.Instant) ArrayList(java.util.ArrayList) Diagnosis(com.microsoft.dhalion.core.Diagnosis) SymptomsTable(com.microsoft.dhalion.core.SymptomsTable)

Example 3 with SymptomsTable

use of com.microsoft.dhalion.core.SymptomsTable in project heron by twitter.

the class BackPressureDetectorTest method testConfigAndFilter.

@Test
public void testConfigAndFilter() throws IOException {
    HealthPolicyConfig config = mock(HealthPolicyConfig.class);
    when(config.getConfig(CONF_NOISE_FILTER, 20)).thenReturn(50);
    Measurement measurement1 = new Measurement("bolt", "i1", METRIC_BACK_PRESSURE.text(), now, 55);
    Measurement measurement2 = new Measurement("bolt", "i2", METRIC_BACK_PRESSURE.text(), now, 3);
    Measurement measurement3 = new Measurement("bolt", "i3", METRIC_BACK_PRESSURE.text(), now, 0);
    Collection<Measurement> metrics = new ArrayList<>();
    metrics.add(measurement1);
    metrics.add(measurement2);
    metrics.add(measurement3);
    HealthManagerMetrics publishingMetrics = mock(HealthManagerMetrics.class);
    BackPressureDetector detector = new BackPressureDetector(config, publishingMetrics);
    PoliciesExecutor.ExecutionContext context = mock(PoliciesExecutor.ExecutionContext.class);
    when(context.checkpoint()).thenReturn(now);
    detector.initialize(context);
    Collection<Symptom> symptoms = detector.detect(metrics);
    Assert.assertEquals(2, symptoms.size());
    SymptomsTable compSymptom = SymptomsTable.of(symptoms).type(SYMPTOM_COMP_BACK_PRESSURE.text());
    Assert.assertEquals(1, compSymptom.size());
    Assert.assertEquals(1, compSymptom.get().iterator().next().assignments().size());
    SymptomsTable instanceSymptom = SymptomsTable.of(symptoms).type(SYMPTOM_INSTANCE_BACK_PRESSURE.text());
    Assert.assertEquals(1, instanceSymptom.size());
    Assert.assertEquals(1, instanceSymptom.get().iterator().next().assignments().size());
    Symptom symptom = symptoms.iterator().next();
    measurement1 = new Measurement("bolt", "i1", METRIC_BACK_PRESSURE.text(), now, 45);
    measurement2 = new Measurement("bolt", "i2", METRIC_BACK_PRESSURE.text(), now, 3);
    metrics = new ArrayList<>();
    metrics.add(measurement1);
    metrics.add(measurement2);
    detector = new BackPressureDetector(config, publishingMetrics);
    detector.initialize(context);
    symptoms = detector.detect(metrics);
    Assert.assertEquals(0, symptoms.size());
}
Also used : Measurement(com.microsoft.dhalion.core.Measurement) HealthPolicyConfig(org.apache.heron.healthmgr.HealthPolicyConfig) HealthManagerMetrics(org.apache.heron.healthmgr.HealthManagerMetrics) ArrayList(java.util.ArrayList) PoliciesExecutor(com.microsoft.dhalion.policy.PoliciesExecutor) Symptom(com.microsoft.dhalion.core.Symptom) SymptomsTable(com.microsoft.dhalion.core.SymptomsTable) Test(org.junit.Test)

Example 4 with SymptomsTable

use of com.microsoft.dhalion.core.SymptomsTable in project heron by twitter.

the class ProcessingRateSkewDetectorTest method testConfigAndFilter.

@Test
public void testConfigAndFilter() {
    HealthPolicyConfig config = mock(HealthPolicyConfig.class);
    when(config.getConfig(CONF_SKEW_RATIO, 1.5)).thenReturn(2.5);
    Measurement measurement1 = new Measurement("bolt", "i1", METRIC_EXE_COUNT.text(), Instant.ofEpochSecond(1497892222), 1000);
    Measurement measurement2 = new Measurement("bolt", "i2", METRIC_EXE_COUNT.text(), Instant.ofEpochSecond(1497892222), 200.0);
    Collection<Measurement> metrics = new ArrayList<>();
    metrics.add(measurement1);
    metrics.add(measurement2);
    ProcessingRateSkewDetector detector = new ProcessingRateSkewDetector(config);
    PoliciesExecutor.ExecutionContext context = mock(PoliciesExecutor.ExecutionContext.class);
    when(context.checkpoint()).thenReturn(Instant.now());
    detector.initialize(context);
    Collection<Symptom> symptoms = detector.detect(metrics);
    assertEquals(3, symptoms.size());
    SymptomsTable symptomsTable = SymptomsTable.of(symptoms);
    assertEquals(1, symptomsTable.type("POSITIVE " + BaseDetector.SymptomType.SYMPTOM_PROCESSING_RATE_SKEW).size());
    assertEquals(1, symptomsTable.type("NEGATIVE " + BaseDetector.SymptomType.SYMPTOM_PROCESSING_RATE_SKEW).size());
    assertEquals(1, symptomsTable.type("POSITIVE " + BaseDetector.SymptomType.SYMPTOM_PROCESSING_RATE_SKEW).assignment("i1").size());
    assertEquals(1, symptomsTable.type("NEGATIVE " + BaseDetector.SymptomType.SYMPTOM_PROCESSING_RATE_SKEW).assignment("i2").size());
    measurement1 = new Measurement("bolt", "i1", METRIC_EXE_COUNT.text(), Instant.ofEpochSecond(1497892222), 1000);
    measurement2 = new Measurement("bolt", "i2", METRIC_EXE_COUNT.text(), Instant.ofEpochSecond(1497892222), 500.0);
    metrics = new ArrayList<>();
    metrics.add(measurement1);
    metrics.add(measurement2);
    detector = new ProcessingRateSkewDetector(config);
    detector.initialize(context);
    symptoms = detector.detect(metrics);
    assertEquals(0, symptoms.size());
}
Also used : Measurement(com.microsoft.dhalion.core.Measurement) HealthPolicyConfig(org.apache.heron.healthmgr.HealthPolicyConfig) ArrayList(java.util.ArrayList) PoliciesExecutor(com.microsoft.dhalion.policy.PoliciesExecutor) Symptom(com.microsoft.dhalion.core.Symptom) SymptomsTable(com.microsoft.dhalion.core.SymptomsTable) Test(org.junit.Test)

Example 5 with SymptomsTable

use of com.microsoft.dhalion.core.SymptomsTable in project heron by twitter.

the class WaitQueueSkewDetectorTest method testConfigAndFilter.

@Test
public void testConfigAndFilter() {
    HealthPolicyConfig config = mock(HealthPolicyConfig.class);
    when(config.getConfig(CONF_SKEW_RATIO, 20.0)).thenReturn(15.0);
    Measurement measurement1 = new Measurement("bolt", "i1", METRIC_WAIT_Q_SIZE.text(), Instant.ofEpochSecond(1497892222), 1501);
    Measurement measurement2 = new Measurement("bolt", "i2", METRIC_WAIT_Q_SIZE.text(), Instant.ofEpochSecond(1497892222), 100.0);
    Collection<Measurement> metrics = new ArrayList<>();
    metrics.add(measurement1);
    metrics.add(measurement2);
    WaitQueueSkewDetector detector = new WaitQueueSkewDetector(config);
    PoliciesExecutor.ExecutionContext context = mock(PoliciesExecutor.ExecutionContext.class);
    when(context.checkpoint()).thenReturn(Instant.now());
    detector.initialize(context);
    Collection<Symptom> symptoms = detector.detect(metrics);
    assertEquals(3, symptoms.size());
    SymptomsTable symptomsTable = SymptomsTable.of(symptoms);
    assertEquals(1, symptomsTable.type("POSITIVE " + BaseDetector.SymptomType.SYMPTOM_WAIT_Q_SIZE_SKEW).size());
    assertEquals(1, symptomsTable.type("NEGATIVE " + BaseDetector.SymptomType.SYMPTOM_WAIT_Q_SIZE_SKEW).size());
    assertEquals(1, symptomsTable.type("POSITIVE " + BaseDetector.SymptomType.SYMPTOM_WAIT_Q_SIZE_SKEW).assignment("i1").size());
    assertEquals(1, symptomsTable.type("NEGATIVE " + BaseDetector.SymptomType.SYMPTOM_WAIT_Q_SIZE_SKEW).assignment("i2").size());
    measurement1 = new Measurement("bolt", "i1", METRIC_WAIT_Q_SIZE.text(), Instant.ofEpochSecond(1497892222), 1500);
    measurement2 = new Measurement("bolt", "i2", METRIC_WAIT_Q_SIZE.text(), Instant.ofEpochSecond(1497892222), 110.0);
    metrics = new ArrayList<>();
    metrics.add(measurement1);
    metrics.add(measurement2);
    detector = new WaitQueueSkewDetector(config);
    detector.initialize(context);
    symptoms = detector.detect(metrics);
    assertEquals(0, symptoms.size());
}
Also used : Measurement(com.microsoft.dhalion.core.Measurement) HealthPolicyConfig(org.apache.heron.healthmgr.HealthPolicyConfig) ArrayList(java.util.ArrayList) PoliciesExecutor(com.microsoft.dhalion.policy.PoliciesExecutor) Symptom(com.microsoft.dhalion.core.Symptom) SymptomsTable(com.microsoft.dhalion.core.SymptomsTable) Test(org.junit.Test)

Aggregations

SymptomsTable (com.microsoft.dhalion.core.SymptomsTable)8 ArrayList (java.util.ArrayList)7 Measurement (com.microsoft.dhalion.core.Measurement)4 Symptom (com.microsoft.dhalion.core.Symptom)4 PoliciesExecutor (com.microsoft.dhalion.policy.PoliciesExecutor)4 HealthPolicyConfig (org.apache.heron.healthmgr.HealthPolicyConfig)4 Test (org.junit.Test)4 Diagnosis (com.microsoft.dhalion.core.Diagnosis)3 Instant (java.time.Instant)3 MeasurementsTable (com.microsoft.dhalion.core.MeasurementsTable)2 HashSet (java.util.HashSet)1 HealthManagerMetrics (org.apache.heron.healthmgr.HealthManagerMetrics)1 ContainerRestart (org.apache.heron.healthmgr.common.HealthManagerEvents.ContainerRestart)1