Search in sources :

Example 1 with Diagnosis

use of com.microsoft.dhalion.core.Diagnosis in project heron by twitter.

the class DataSkewDiagnoser method diagnose.

@Override
public Collection<Diagnosis> diagnose(Collection<Symptom> symptoms) {
    Collection<Diagnosis> diagnoses = new ArrayList<>();
    SymptomsTable symptomsTable = SymptomsTable.of(symptoms);
    SymptomsTable bp = symptomsTable.type(SYMPTOM_COMP_BACK_PRESSURE.text());
    if (bp.size() > 1) {
        // TODO handle cases where multiple detectors create back pressure symptom
        throw new IllegalStateException("Multiple back-pressure symptoms case");
    }
    if (bp.size() == 0) {
        return diagnoses;
    }
    String bpComponent = bp.first().assignments().iterator().next();
    SymptomsTable processingRateSkew = symptomsTable.type(SYMPTOM_PROCESSING_RATE_SKEW.text());
    SymptomsTable waitQSkew = symptomsTable.type(SYMPTOM_WAIT_Q_SIZE_SKEW.text());
    // verify data skew, larger queue size and back pressure for the same component exists
    if (waitQSkew.assignment(bpComponent).size() == 0 || processingRateSkew.assignment(bpComponent).size() == 0) {
        return diagnoses;
    }
    Collection<String> assignments = new ArrayList<>();
    Instant newest = context.checkpoint();
    Instant oldest = context.previousCheckpoint();
    MeasurementsTable measurements = context.measurements().between(oldest, newest).component(bpComponent);
    for (String instance : measurements.uniqueInstances()) {
        MeasurementsTable instanceMeasurements = measurements.instance(instance);
        double waitQSize = instanceMeasurements.type(METRIC_WAIT_Q_SIZE.text()).mean();
        double processingRate = instanceMeasurements.type(METRIC_EXE_COUNT.text()).mean();
        if ((measurements.type(METRIC_WAIT_Q_SIZE.text()).max() < waitQSize * 2) && (measurements.type(METRIC_EXE_COUNT.text()).max() < 1.10 * processingRate)) {
            assignments.add(instance);
            LOG.info(String.format("DataSkew: %s back-pressure, high execution count: %s and " + "high buffer size %s", instance, processingRate, waitQSize));
        }
    }
    if (assignments.size() > 0) {
        diagnoses.add(new Diagnosis(DIAGNOSIS_DATA_SKEW.text(), context.checkpoint(), assignments));
    }
    return diagnoses;
}
Also used : MeasurementsTable(com.microsoft.dhalion.core.MeasurementsTable) Instant(java.time.Instant) ArrayList(java.util.ArrayList) Diagnosis(com.microsoft.dhalion.core.Diagnosis) SymptomsTable(com.microsoft.dhalion.core.SymptomsTable)

Example 2 with Diagnosis

use of com.microsoft.dhalion.core.Diagnosis in project heron by twitter.

the class SlowInstanceDiagnoser method diagnose.

@Override
public Collection<Diagnosis> diagnose(Collection<Symptom> symptoms) {
    publishingMetrics.executeDiagnoserIncr(SLOW_INSTANCE_DIAGNOSER);
    Collection<Diagnosis> diagnoses = new ArrayList<>();
    SymptomsTable symptomsTable = SymptomsTable.of(symptoms);
    SymptomsTable bp = symptomsTable.type(SYMPTOM_COMP_BACK_PRESSURE.text());
    if (bp.size() > 1) {
        // TODO handle cases where multiple detectors create back pressure symptom
        throw new IllegalStateException("Multiple back-pressure symptoms case");
    }
    if (bp.size() == 0) {
        return diagnoses;
    }
    String bpComponent = bp.first().assignments().iterator().next();
    SymptomsTable processingRateSkew = symptomsTable.type(SYMPTOM_PROCESSING_RATE_SKEW.text());
    SymptomsTable waitQSkew = symptomsTable.type(SYMPTOM_WAIT_Q_SIZE_SKEW.text());
    // exist
    if (waitQSkew.assignment(bpComponent).size() == 0 || processingRateSkew.assignment(bpComponent).size() > 0) {
        // TODO in a short window rate skew could exist
        return diagnoses;
    }
    Collection<String> assignments = new ArrayList<>();
    Instant newest = context.checkpoint();
    Instant oldest = context.previousCheckpoint();
    MeasurementsTable measurements = context.measurements().between(oldest, newest).component(bpComponent);
    for (String instance : measurements.uniqueInstances()) {
        MeasurementsTable instanceMeasurements = measurements.instance(instance);
        double waitQSize = instanceMeasurements.type(METRIC_WAIT_Q_SIZE.text()).mean();
        if (measurements.type(METRIC_WAIT_Q_SIZE.text()).max() < waitQSize * 2) {
            assignments.add(instance);
            LOG.info(String.format("SLOW: %s back-pressure and high buffer size: %s " + "and similar processing rates", instance, waitQSize));
        }
    }
    if (assignments.size() > 0) {
        Instant now = context.checkpoint();
        diagnoses.add(new Diagnosis(DIAGNOSIS_SLOW_INSTANCE.text(), now, assignments));
    }
    return diagnoses;
}
Also used : MeasurementsTable(com.microsoft.dhalion.core.MeasurementsTable) Instant(java.time.Instant) ArrayList(java.util.ArrayList) Diagnosis(com.microsoft.dhalion.core.Diagnosis) SymptomsTable(com.microsoft.dhalion.core.SymptomsTable)

Example 3 with Diagnosis

use of com.microsoft.dhalion.core.Diagnosis in project heron by twitter.

the class ScaleUpResolver method resolve.

@Override
public Collection<Action> resolve(Collection<Diagnosis> diagnosis) {
    List<Action> actions = new ArrayList<>();
    DiagnosisTable table = DiagnosisTable.of(diagnosis);
    table = table.type(DIAGNOSIS_UNDER_PROVISIONING.text());
    if (table.size() == 0) {
        LOG.fine("No under-previsioning diagnosis present, ending as there's nothing to fix");
        return actions;
    }
    // Scale the first assigned component
    Diagnosis diagnoses = table.first();
    // verify diagnoses instance is valid
    if (diagnoses.assignments().isEmpty()) {
        LOG.warning(String.format("Diagnosis %s is missing assignments", diagnoses.id()));
        return actions;
    }
    String component = diagnoses.assignments().iterator().next();
    int newParallelism = computeScaleUpFactor(component);
    Map<String, Integer> changeRequest = new HashMap<>();
    changeRequest.put(component, newParallelism);
    PackingPlan currentPackingPlan = packingPlanProvider.get();
    PackingPlan newPlan = buildNewPackingPlan(changeRequest, currentPackingPlan);
    if (newPlan == null) {
        return null;
    }
    Scheduler.UpdateTopologyRequest updateTopologyRequest = Scheduler.UpdateTopologyRequest.newBuilder().setCurrentPackingPlan(getSerializedPlan(currentPackingPlan)).setProposedPackingPlan(getSerializedPlan(newPlan)).build();
    LOG.info("Sending Updating topology request: " + updateTopologyRequest);
    if (!schedulerClient.updateTopology(updateTopologyRequest)) {
        throw new RuntimeException(String.format("Failed to update topology with Scheduler, " + "updateTopologyRequest=%s", updateTopologyRequest));
    }
    LOG.info("Scheduler updated topology successfully.");
    LOG.info("Broadcasting topology update event");
    TopologyUpdate action = new TopologyUpdate(context.checkpoint(), Collections.singletonList(component));
    eventManager.onEvent(action);
    actions.add(action);
    return actions;
}
Also used : Action(com.microsoft.dhalion.core.Action) HashMap(java.util.HashMap) Scheduler(org.apache.heron.proto.scheduler.Scheduler) PackingPlan(org.apache.heron.spi.packing.PackingPlan) ArrayList(java.util.ArrayList) DiagnosisTable(com.microsoft.dhalion.core.DiagnosisTable) TopologyUpdate(org.apache.heron.healthmgr.common.HealthManagerEvents.TopologyUpdate) Diagnosis(com.microsoft.dhalion.core.Diagnosis)

Example 4 with Diagnosis

use of com.microsoft.dhalion.core.Diagnosis in project heron by twitter.

the class SlowInstanceDiagnoserTest method failsIfNoBufferSizeDisparity.

@Test
public void failsIfNoBufferSizeDisparity() {
    Symptom symptom = new Symptom(SYMPTOM_COMP_BACK_PRESSURE.text(), Instant.now(), null);
    Collection<Symptom> symptoms = Collections.singletonList(symptom);
    Collection<Diagnosis> result = diagnoser.diagnose(symptoms);
    assertEquals(0, result.size());
}
Also used : Diagnosis(com.microsoft.dhalion.core.Diagnosis) Symptom(com.microsoft.dhalion.core.Symptom) Test(org.junit.Test)

Example 5 with Diagnosis

use of com.microsoft.dhalion.core.Diagnosis in project heron by twitter.

the class SlowInstanceDiagnoserTest method diagnosis1of3SlowInstances.

@Test
public void diagnosis1of3SlowInstances() {
    addMeasurements(METRIC_BACK_PRESSURE, 123, 0, 0);
    addMeasurements(METRIC_WAIT_Q_SIZE, 1000, 20, 20);
    when(context.measurements()).thenReturn(MeasurementsTable.of(measurements));
    Collection<String> assign = Collections.singleton(comp);
    Symptom bpSymptom = new Symptom(SYMPTOM_COMP_BACK_PRESSURE.text(), now, assign);
    Symptom qDisparitySymptom = new Symptom(SYMPTOM_WAIT_Q_SIZE_SKEW.text(), now, assign);
    Collection<Symptom> symptoms = Arrays.asList(bpSymptom, qDisparitySymptom);
    Collection<Diagnosis> result = diagnoser.diagnose(symptoms);
    assertEquals(1, result.size());
    Diagnosis diagnoses = result.iterator().next();
    assertEquals(DIAGNOSIS_SLOW_INSTANCE.text(), diagnoses.type());
    assertEquals(1, diagnoses.assignments().size());
    assertEquals("i1", diagnoses.assignments().iterator().next());
// TODO
// assertEquals(1, diagnoses.symptoms().size());
}
Also used : Diagnosis(com.microsoft.dhalion.core.Diagnosis) Symptom(com.microsoft.dhalion.core.Symptom) Test(org.junit.Test)

Aggregations

Diagnosis (com.microsoft.dhalion.core.Diagnosis)16 Test (org.junit.Test)11 Symptom (com.microsoft.dhalion.core.Symptom)10 ArrayList (java.util.ArrayList)4 SymptomsTable (com.microsoft.dhalion.core.SymptomsTable)3 Instant (java.time.Instant)3 Action (com.microsoft.dhalion.core.Action)2 MeasurementsTable (com.microsoft.dhalion.core.MeasurementsTable)2 HashMap (java.util.HashMap)2 PackingPlan (org.apache.heron.spi.packing.PackingPlan)2 DiagnosisTable (com.microsoft.dhalion.core.DiagnosisTable)1 Measurement (com.microsoft.dhalion.core.Measurement)1 ExecutionContext (com.microsoft.dhalion.policy.PoliciesExecutor.ExecutionContext)1 TopologyAPI (org.apache.heron.api.generated.TopologyAPI)1 TopologyUpdate (org.apache.heron.healthmgr.common.HealthManagerEvents.TopologyUpdate)1 PackingPlanProvider (org.apache.heron.healthmgr.common.PackingPlanProvider)1 Scheduler (org.apache.heron.proto.scheduler.Scheduler)1 UpdateTopologyRequest (org.apache.heron.proto.scheduler.Scheduler.UpdateTopologyRequest)1 ISchedulerClient (org.apache.heron.scheduler.client.ISchedulerClient)1 Config (org.apache.heron.spi.common.Config)1