Search in sources :

Example 6 with SymptomsTable

use of com.microsoft.dhalion.core.SymptomsTable in project heron by twitter.

the class RestartContainerResolver method resolve.

@Override
public Collection<Action> resolve(Collection<Diagnosis> diagnosis) {
    publishingMetrics.executeResolver(RESTART_CONTAINER_RESOLVER);
    // find all back pressure measurements reported in this execution cycle
    Instant current = context.checkpoint();
    Instant previous = context.previousCheckpoint();
    SymptomsTable bpSymptoms = context.symptoms().type(SYMPTOM_INSTANCE_BACK_PRESSURE.text()).between(previous, current);
    if (bpSymptoms.size() == 0) {
        LOG.fine("No back-pressure measurements found, ending as there's nothing to fix");
        return Collections.EMPTY_LIST;
    }
    Collection<String> allBpInstances = new HashSet<>();
    bpSymptoms.get().forEach(symptom -> allBpInstances.addAll(symptom.assignments()));
    LOG.info(String.format("%d instances caused back-pressure", allBpInstances.size()));
    Collection<String> stmgrIds = new HashSet<>();
    allBpInstances.forEach(instanceId -> {
        LOG.info("Id of instance causing back-pressure: " + instanceId);
        int fromIndex = instanceId.indexOf('_') + 1;
        int toIndex = instanceId.indexOf('_', fromIndex);
        String stmgrId = instanceId.substring(fromIndex, toIndex);
        stmgrIds.add(stmgrId);
    });
    stmgrIds.forEach(stmgrId -> {
        LOG.info("Restarting container: " + stmgrId);
        boolean b = schedulerClient.restartTopology(RestartTopologyRequest.newBuilder().setContainerIndex(Integer.valueOf(stmgrId)).setTopologyName(topologyName).build());
        LOG.info("Restarted container result: " + b);
        publishingMetrics.executeIncr("RestartContainer");
    });
    LOG.info("Broadcasting container restart event");
    ContainerRestart action = new ContainerRestart(current, stmgrIds);
    eventManager.onEvent(action);
    return Collections.singletonList(action);
}
Also used : Instant(java.time.Instant) SymptomsTable(com.microsoft.dhalion.core.SymptomsTable) ContainerRestart(org.apache.heron.healthmgr.common.HealthManagerEvents.ContainerRestart) HashSet(java.util.HashSet)

Example 7 with SymptomsTable

use of com.microsoft.dhalion.core.SymptomsTable in project heron by twitter.

the class ProcessingRateSkewDetectorTest method testReturnsMultipleComponents.

@Test
public void testReturnsMultipleComponents() {
    HealthPolicyConfig config = mock(HealthPolicyConfig.class);
    when(config.getConfig(CONF_SKEW_RATIO, 1.5)).thenReturn(2.5);
    Measurement measurement1 = new Measurement("bolt", "i1", METRIC_EXE_COUNT.text(), Instant.ofEpochSecond(1497892222), 1000);
    Measurement measurement2 = new Measurement("bolt", "i2", METRIC_EXE_COUNT.text(), Instant.ofEpochSecond(1497892222), 200.0);
    Measurement measurement3 = new Measurement("bolt2", "i3", METRIC_EXE_COUNT.text(), Instant.ofEpochSecond(1497892222), 1000);
    Measurement measurement4 = new Measurement("bolt2", "i4", METRIC_EXE_COUNT.text(), Instant.ofEpochSecond(1497892222), 200.0);
    Measurement measurement5 = new Measurement("bolt3", "i5", METRIC_EXE_COUNT.text(), Instant.ofEpochSecond(1497892222), 1000);
    Measurement measurement6 = new Measurement("bolt3", "i6", METRIC_EXE_COUNT.text(), Instant.ofEpochSecond(1497892222), 500.0);
    Collection<Measurement> metrics = new ArrayList<>();
    metrics.add(measurement1);
    metrics.add(measurement2);
    metrics.add(measurement3);
    metrics.add(measurement4);
    metrics.add(measurement5);
    metrics.add(measurement6);
    ProcessingRateSkewDetector detector = new ProcessingRateSkewDetector(config);
    PoliciesExecutor.ExecutionContext context = mock(PoliciesExecutor.ExecutionContext.class);
    when(context.checkpoint()).thenReturn(Instant.now());
    detector.initialize(context);
    Collection<Symptom> symptoms = detector.detect(metrics);
    assertEquals(6, symptoms.size());
    SymptomsTable symptomsTable = SymptomsTable.of(symptoms);
    assertEquals(2, symptomsTable.type("POSITIVE " + BaseDetector.SymptomType.SYMPTOM_PROCESSING_RATE_SKEW).size());
    assertEquals(2, symptomsTable.type("NEGATIVE " + BaseDetector.SymptomType.SYMPTOM_PROCESSING_RATE_SKEW).size());
    assertEquals(1, symptomsTable.type("POSITIVE " + BaseDetector.SymptomType.SYMPTOM_PROCESSING_RATE_SKEW).assignment("i1").size());
    assertEquals(1, symptomsTable.type("POSITIVE " + BaseDetector.SymptomType.SYMPTOM_PROCESSING_RATE_SKEW).assignment("i3").size());
    assertEquals(1, symptomsTable.type("NEGATIVE " + BaseDetector.SymptomType.SYMPTOM_PROCESSING_RATE_SKEW).assignment("i2").size());
    assertEquals(1, symptomsTable.type("NEGATIVE " + BaseDetector.SymptomType.SYMPTOM_PROCESSING_RATE_SKEW).assignment("i4").size());
}
Also used : Measurement(com.microsoft.dhalion.core.Measurement) HealthPolicyConfig(org.apache.heron.healthmgr.HealthPolicyConfig) ArrayList(java.util.ArrayList) PoliciesExecutor(com.microsoft.dhalion.policy.PoliciesExecutor) Symptom(com.microsoft.dhalion.core.Symptom) SymptomsTable(com.microsoft.dhalion.core.SymptomsTable) Test(org.junit.Test)

Example 8 with SymptomsTable

use of com.microsoft.dhalion.core.SymptomsTable in project heron by twitter.

the class UnderProvisioningDiagnoser method diagnose.

@Override
public Collection<Diagnosis> diagnose(Collection<Symptom> symptoms) {
    Collection<Diagnosis> diagnoses = new ArrayList<>();
    SymptomsTable symptomsTable = SymptomsTable.of(symptoms);
    SymptomsTable bp = symptomsTable.type(SYMPTOM_COMP_BACK_PRESSURE.text());
    if (bp.size() > 1) {
        // TODO handle cases where multiple detectors create back pressure symptom
        throw new IllegalStateException("Multiple back-pressure symptoms case");
    }
    if (bp.size() == 0) {
        return diagnoses;
    }
    String bpComponent = bp.first().assignments().iterator().next();
    SymptomsTable processingRateSkew = symptomsTable.type(SYMPTOM_PROCESSING_RATE_SKEW.text());
    SymptomsTable waitQSkew = symptomsTable.type(SYMPTOM_WAIT_Q_SIZE_SKEW.text());
    if (waitQSkew.assignment(bpComponent).size() != 0 || processingRateSkew.assignment(bpComponent).size() != 0) {
        return diagnoses;
    }
    Collection<String> assignments = Collections.singletonList(bpComponent);
    LOG.info(String.format("UNDER_PROVISIONING: %s back-pressure and similar processing rates " + "and wait queue sizes", bpComponent));
    diagnoses.add(new Diagnosis(DIAGNOSIS_UNDER_PROVISIONING.text(), context.checkpoint(), assignments));
    return diagnoses;
}
Also used : ArrayList(java.util.ArrayList) Diagnosis(com.microsoft.dhalion.core.Diagnosis) SymptomsTable(com.microsoft.dhalion.core.SymptomsTable)

Aggregations

SymptomsTable (com.microsoft.dhalion.core.SymptomsTable)8 ArrayList (java.util.ArrayList)7 Measurement (com.microsoft.dhalion.core.Measurement)4 Symptom (com.microsoft.dhalion.core.Symptom)4 PoliciesExecutor (com.microsoft.dhalion.policy.PoliciesExecutor)4 HealthPolicyConfig (org.apache.heron.healthmgr.HealthPolicyConfig)4 Test (org.junit.Test)4 Diagnosis (com.microsoft.dhalion.core.Diagnosis)3 Instant (java.time.Instant)3 MeasurementsTable (com.microsoft.dhalion.core.MeasurementsTable)2 HashSet (java.util.HashSet)1 HealthManagerMetrics (org.apache.heron.healthmgr.HealthManagerMetrics)1 ContainerRestart (org.apache.heron.healthmgr.common.HealthManagerEvents.ContainerRestart)1