Search in sources :

Example 1 with MeasurementsTable

use of com.microsoft.dhalion.core.MeasurementsTable in project heron by twitter.

the class GrowingWaitQueueDetector method detect.

/**
 * Detects all components unable to keep up with input load, hence having a growing pending buffer
 * or wait queue
 *
 * @return A collection of symptoms each one corresponding to a components executing slower
 * than input rate.
 */
@Override
public Collection<Symptom> detect(Collection<Measurement> measurements) {
    Collection<Symptom> result = new ArrayList<>();
    MeasurementsTable waitQueueMetrics = MeasurementsTable.of(measurements).type(METRIC_WAIT_Q_SIZE.text());
    for (String component : waitQueueMetrics.uniqueComponents()) {
        double maxSlope = computeWaitQueueSizeTrend(waitQueueMetrics.component(component));
        if (maxSlope > rateLimit) {
            LOG.info(String.format("Detected growing wait queues for %s, max rate %f", component, maxSlope));
            Collection<String> addresses = Collections.singletonList(component);
            result.add(new Symptom(SYMPTOM_GROWING_WAIT_Q.text(), context.checkpoint(), addresses));
        }
    }
    return result;
}
Also used : MeasurementsTable(com.microsoft.dhalion.core.MeasurementsTable) ArrayList(java.util.ArrayList) Symptom(com.microsoft.dhalion.core.Symptom)

Example 2 with MeasurementsTable

use of com.microsoft.dhalion.core.MeasurementsTable in project heron by twitter.

the class DataSkewDiagnoser method diagnose.

@Override
public Collection<Diagnosis> diagnose(Collection<Symptom> symptoms) {
    Collection<Diagnosis> diagnoses = new ArrayList<>();
    SymptomsTable symptomsTable = SymptomsTable.of(symptoms);
    SymptomsTable bp = symptomsTable.type(SYMPTOM_COMP_BACK_PRESSURE.text());
    if (bp.size() > 1) {
        // TODO handle cases where multiple detectors create back pressure symptom
        throw new IllegalStateException("Multiple back-pressure symptoms case");
    }
    if (bp.size() == 0) {
        return diagnoses;
    }
    String bpComponent = bp.first().assignments().iterator().next();
    SymptomsTable processingRateSkew = symptomsTable.type(SYMPTOM_PROCESSING_RATE_SKEW.text());
    SymptomsTable waitQSkew = symptomsTable.type(SYMPTOM_WAIT_Q_SIZE_SKEW.text());
    // verify data skew, larger queue size and back pressure for the same component exists
    if (waitQSkew.assignment(bpComponent).size() == 0 || processingRateSkew.assignment(bpComponent).size() == 0) {
        return diagnoses;
    }
    Collection<String> assignments = new ArrayList<>();
    Instant newest = context.checkpoint();
    Instant oldest = context.previousCheckpoint();
    MeasurementsTable measurements = context.measurements().between(oldest, newest).component(bpComponent);
    for (String instance : measurements.uniqueInstances()) {
        MeasurementsTable instanceMeasurements = measurements.instance(instance);
        double waitQSize = instanceMeasurements.type(METRIC_WAIT_Q_SIZE.text()).mean();
        double processingRate = instanceMeasurements.type(METRIC_EXE_COUNT.text()).mean();
        if ((measurements.type(METRIC_WAIT_Q_SIZE.text()).max() < waitQSize * 2) && (measurements.type(METRIC_EXE_COUNT.text()).max() < 1.10 * processingRate)) {
            assignments.add(instance);
            LOG.info(String.format("DataSkew: %s back-pressure, high execution count: %s and " + "high buffer size %s", instance, processingRate, waitQSize));
        }
    }
    if (assignments.size() > 0) {
        diagnoses.add(new Diagnosis(DIAGNOSIS_DATA_SKEW.text(), context.checkpoint(), assignments));
    }
    return diagnoses;
}
Also used : MeasurementsTable(com.microsoft.dhalion.core.MeasurementsTable) Instant(java.time.Instant) ArrayList(java.util.ArrayList) Diagnosis(com.microsoft.dhalion.core.Diagnosis) SymptomsTable(com.microsoft.dhalion.core.SymptomsTable)

Example 3 with MeasurementsTable

use of com.microsoft.dhalion.core.MeasurementsTable in project heron by twitter.

the class LargeWaitQueueDetector method detect.

/**
 * Detects all components having a large pending buffer or wait queue
 *
 * @return A collection of symptoms each one corresponding to components with
 * large wait queues.
 */
@Override
public Collection<Symptom> detect(Collection<Measurement> measurements) {
    Collection<Symptom> result = new ArrayList<>();
    MeasurementsTable waitQueueMetrics = MeasurementsTable.of(measurements).type(METRIC_WAIT_Q_SIZE.text());
    for (String component : waitQueueMetrics.uniqueComponents()) {
        Set<String> addresses = new HashSet<>();
        MeasurementsTable instanceMetrics = waitQueueMetrics.component(component);
        for (String instance : instanceMetrics.uniqueInstances()) {
            double avgWaitQSize = instanceMetrics.instance(instance).mean();
            if (avgWaitQSize > sizeLimit) {
                LOG.info(String.format("Detected large wait queues for instance" + "%s, smallest queue is + %f", instance, avgWaitQSize));
                addresses.add(instance);
            }
        }
        if (addresses.size() > 0) {
            result.add(new Symptom(SYMPTOM_LARGE_WAIT_Q.text(), context.checkpoint(), addresses));
        }
    }
    return result;
}
Also used : MeasurementsTable(com.microsoft.dhalion.core.MeasurementsTable) ArrayList(java.util.ArrayList) Symptom(com.microsoft.dhalion.core.Symptom) HashSet(java.util.HashSet)

Example 4 with MeasurementsTable

use of com.microsoft.dhalion.core.MeasurementsTable in project heron by twitter.

the class SlowInstanceDiagnoser method diagnose.

@Override
public Collection<Diagnosis> diagnose(Collection<Symptom> symptoms) {
    publishingMetrics.executeDiagnoserIncr(SLOW_INSTANCE_DIAGNOSER);
    Collection<Diagnosis> diagnoses = new ArrayList<>();
    SymptomsTable symptomsTable = SymptomsTable.of(symptoms);
    SymptomsTable bp = symptomsTable.type(SYMPTOM_COMP_BACK_PRESSURE.text());
    if (bp.size() > 1) {
        // TODO handle cases where multiple detectors create back pressure symptom
        throw new IllegalStateException("Multiple back-pressure symptoms case");
    }
    if (bp.size() == 0) {
        return diagnoses;
    }
    String bpComponent = bp.first().assignments().iterator().next();
    SymptomsTable processingRateSkew = symptomsTable.type(SYMPTOM_PROCESSING_RATE_SKEW.text());
    SymptomsTable waitQSkew = symptomsTable.type(SYMPTOM_WAIT_Q_SIZE_SKEW.text());
    // exist
    if (waitQSkew.assignment(bpComponent).size() == 0 || processingRateSkew.assignment(bpComponent).size() > 0) {
        // TODO in a short window rate skew could exist
        return diagnoses;
    }
    Collection<String> assignments = new ArrayList<>();
    Instant newest = context.checkpoint();
    Instant oldest = context.previousCheckpoint();
    MeasurementsTable measurements = context.measurements().between(oldest, newest).component(bpComponent);
    for (String instance : measurements.uniqueInstances()) {
        MeasurementsTable instanceMeasurements = measurements.instance(instance);
        double waitQSize = instanceMeasurements.type(METRIC_WAIT_Q_SIZE.text()).mean();
        if (measurements.type(METRIC_WAIT_Q_SIZE.text()).max() < waitQSize * 2) {
            assignments.add(instance);
            LOG.info(String.format("SLOW: %s back-pressure and high buffer size: %s " + "and similar processing rates", instance, waitQSize));
        }
    }
    if (assignments.size() > 0) {
        Instant now = context.checkpoint();
        diagnoses.add(new Diagnosis(DIAGNOSIS_SLOW_INSTANCE.text(), now, assignments));
    }
    return diagnoses;
}
Also used : MeasurementsTable(com.microsoft.dhalion.core.MeasurementsTable) Instant(java.time.Instant) ArrayList(java.util.ArrayList) Diagnosis(com.microsoft.dhalion.core.Diagnosis) SymptomsTable(com.microsoft.dhalion.core.SymptomsTable)

Example 5 with MeasurementsTable

use of com.microsoft.dhalion.core.MeasurementsTable in project heron by twitter.

the class BufferSizeSensor method fetch.

/**
 * The buffer size as provided by tracker
 *
 * @return buffer size measurements
 */
@Override
public Collection<Measurement> fetch() {
    Collection<Measurement> result = new ArrayList<>();
    Instant now = context.checkpoint();
    List<String> boltComponents = physicalPlanProvider.getBoltNames();
    Duration duration = getDuration();
    for (String component : boltComponents) {
        String[] boltInstanceNames = packingPlanProvider.getBoltInstanceNames(component);
        for (String instance : boltInstanceNames) {
            String metric = getMetricName() + instance + MetricName.METRIC_WAIT_Q_SIZE_SUFFIX;
            Collection<Measurement> stmgrResult = metricsProvider.getMeasurements(now, duration, metric, COMPONENT_STMGR);
            if (stmgrResult.isEmpty()) {
                continue;
            }
            MeasurementsTable table = MeasurementsTable.of(stmgrResult).component(COMPONENT_STMGR);
            if (table.size() == 0) {
                continue;
            }
            double totalSize = table.type(metric).sum();
            Measurement measurement = new Measurement(component, instance, getMetricName(), now, totalSize);
            result.add(measurement);
        }
    }
    return result;
}
Also used : Measurement(com.microsoft.dhalion.core.Measurement) MeasurementsTable(com.microsoft.dhalion.core.MeasurementsTable) Instant(java.time.Instant) ArrayList(java.util.ArrayList) Duration(java.time.Duration)

Aggregations

MeasurementsTable (com.microsoft.dhalion.core.MeasurementsTable)21 Measurement (com.microsoft.dhalion.core.Measurement)14 Test (org.junit.Test)12 ArrayList (java.util.ArrayList)10 Instant (java.time.Instant)8 Symptom (com.microsoft.dhalion.core.Symptom)4 TopologyManager (org.apache.heron.proto.tmanager.TopologyManager)4 MetricsProvider (com.microsoft.dhalion.api.MetricsProvider)3 PhysicalPlanProvider (org.apache.heron.healthmgr.common.PhysicalPlanProvider)3 Diagnosis (com.microsoft.dhalion.core.Diagnosis)2 SymptomsTable (com.microsoft.dhalion.core.SymptomsTable)2 PoliciesExecutor (com.microsoft.dhalion.policy.PoliciesExecutor)2 Duration (java.time.Duration)2 HashSet (java.util.HashSet)2 PackingPlanProvider (org.apache.heron.healthmgr.common.PackingPlanProvider)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 ExecutionContext (com.microsoft.dhalion.policy.PoliciesExecutor.ExecutionContext)1 HealthManagerMetrics (org.apache.heron.healthmgr.HealthManagerMetrics)1 HealthPolicyConfig (org.apache.heron.healthmgr.HealthPolicyConfig)1