Search in sources :

Example 1 with ComponentMetricsHelper

use of com.twitter.heron.healthmgr.common.ComponentMetricsHelper in project incubator-heron by apache.

the class GrowingWaitQueueDetector method detect.

/**
 * Detects all components unable to keep up with input load, hence having a growing pending buffer
 * or wait queue
 *
 * @return A collection of all components executing slower than input rate.
 */
@Override
public List<Symptom> detect() {
    ArrayList<Symptom> result = new ArrayList<>();
    Map<String, ComponentMetrics> bufferSizes = pendingBufferSensor.get();
    for (ComponentMetrics compMetrics : bufferSizes.values()) {
        ComponentMetricsHelper compStats = new ComponentMetricsHelper(compMetrics);
        compStats.computeBufferSizeTrend();
        if (compStats.getMaxBufferChangeRate() > rateLimit) {
            LOG.info(String.format("Detected growing wait queues for %s, max rate %f", compMetrics.getName(), compStats.getMaxBufferChangeRate()));
            result.add(new Symptom(SYMPTOM_GROWING_WAIT_Q.text(), compMetrics));
        }
    }
    return result;
}
Also used : ArrayList(java.util.ArrayList) ComponentMetricsHelper(com.twitter.heron.healthmgr.common.ComponentMetricsHelper) Symptom(com.microsoft.dhalion.detector.Symptom) ComponentMetrics(com.microsoft.dhalion.metrics.ComponentMetrics)

Example 2 with ComponentMetricsHelper

use of com.twitter.heron.healthmgr.common.ComponentMetricsHelper in project incubator-heron by apache.

the class SlowInstanceDiagnoser method diagnose.

@Override
public Diagnosis diagnose(List<Symptom> symptoms) {
    List<Symptom> bpSymptoms = getBackPressureSymptoms(symptoms);
    Map<String, ComponentMetrics> processingRateSkewComponents = getProcessingRateSkewComponents(symptoms);
    Map<String, ComponentMetrics> waitQDisparityComponents = getWaitQDisparityComponents(symptoms);
    if (bpSymptoms.isEmpty() || waitQDisparityComponents.isEmpty() || !processingRateSkewComponents.isEmpty()) {
        // execution count, no action is needed
        return null;
    } else if (bpSymptoms.size() > 1) {
        // TODO handle cases where multiple detectors create back pressure symptom
        throw new IllegalStateException("Multiple back-pressure symptoms case");
    }
    ComponentMetrics bpMetrics = bpSymptoms.iterator().next().getComponent();
    // verify wait Q disparity and back pressure for the same component exists
    ComponentMetrics pendingBufferMetrics = waitQDisparityComponents.get(bpMetrics.getName());
    if (pendingBufferMetrics == null) {
        // no wait Q disparity for the component with back pressure. There is no slow instance
        return null;
    }
    ComponentMetrics mergedData = ComponentMetrics.merge(bpMetrics, pendingBufferMetrics);
    ComponentMetricsHelper compStats = new ComponentMetricsHelper(mergedData);
    compStats.computeBpStats();
    MetricsStats bufferStats = compStats.computeMinMaxStats(METRIC_BUFFER_SIZE);
    Symptom resultSymptom = null;
    for (InstanceMetrics boltMetrics : compStats.getBoltsWithBackpressure()) {
        double bufferSize = boltMetrics.getMetricValueSum(METRIC_BUFFER_SIZE.text());
        double bpValue = boltMetrics.getMetricValueSum(METRIC_BACK_PRESSURE.text());
        if (bufferStats.getMetricMax() < bufferSize * 2) {
            LOG.info(String.format("SLOW: %s back-pressure(%s) and high buffer size: %s " + "and similar processing rates", boltMetrics.getName(), bpValue, bufferSize));
            resultSymptom = new Symptom(SYMPTOM_SLOW_INSTANCE.text(), mergedData);
        }
    }
    return resultSymptom != null ? new Diagnosis(DIAGNOSIS_SLOW_INSTANCE.text(), resultSymptom) : null;
}
Also used : InstanceMetrics(com.microsoft.dhalion.metrics.InstanceMetrics) ComponentMetricsHelper(com.twitter.heron.healthmgr.common.ComponentMetricsHelper) Diagnosis(com.microsoft.dhalion.diagnoser.Diagnosis) Symptom(com.microsoft.dhalion.detector.Symptom) ComponentMetrics(com.microsoft.dhalion.metrics.ComponentMetrics) MetricsStats(com.twitter.heron.healthmgr.common.MetricsStats)

Example 3 with ComponentMetricsHelper

use of com.twitter.heron.healthmgr.common.ComponentMetricsHelper in project incubator-heron by apache.

the class SkewDetector method detect.

/**
 * Detects components experiencing data skew, instances with vastly different execute counts.
 *
 * @return A collection of affected components
 */
@Override
public List<Symptom> detect() {
    ArrayList<Symptom> result = new ArrayList<>();
    Map<String, ComponentMetrics> metrics = sensor.get();
    for (ComponentMetrics compMetrics : metrics.values()) {
        ComponentMetricsHelper compStats = new ComponentMetricsHelper(compMetrics);
        MetricsStats stats = compStats.computeMinMaxStats(sensor.getMetricName());
        if (stats.getMetricMax() > skewRatio * stats.getMetricMin()) {
            LOG.info(String.format("Detected skew for %s, min = %f, max = %f", compMetrics.getName(), stats.getMetricMin(), stats.getMetricMax()));
            result.add(new Symptom(symptomName.text(), compMetrics));
        }
    }
    return result;
}
Also used : ArrayList(java.util.ArrayList) ComponentMetricsHelper(com.twitter.heron.healthmgr.common.ComponentMetricsHelper) Symptom(com.microsoft.dhalion.detector.Symptom) ComponentMetrics(com.microsoft.dhalion.metrics.ComponentMetrics) MetricsStats(com.twitter.heron.healthmgr.common.MetricsStats)

Example 4 with ComponentMetricsHelper

use of com.twitter.heron.healthmgr.common.ComponentMetricsHelper in project incubator-heron by apache.

the class LargeWaitQueueDetector method detect.

/**
 * Detects all components unable to keep up with input load, hence having a large pending buffer
 * or wait queue
 *
 * @return A collection of all components executing slower than input rate.
 */
@Override
public List<Symptom> detect() {
    ArrayList<Symptom> result = new ArrayList<>();
    Map<String, ComponentMetrics> bufferSizes = pendingBufferSensor.get();
    for (ComponentMetrics compMetrics : bufferSizes.values()) {
        ComponentMetricsHelper compStats = new ComponentMetricsHelper(compMetrics);
        MetricsStats stats = compStats.computeMinMaxStats(METRIC_BUFFER_SIZE.text());
        if (stats.getMetricMin() > sizeLimit) {
            LOG.info(String.format("Detected large wait queues for %s, smallest queue is %f", compMetrics.getName(), stats.getMetricMin()));
            result.add(new Symptom(SYMPTOM_LARGE_WAIT_Q.text(), compMetrics));
        }
    }
    return result;
}
Also used : ArrayList(java.util.ArrayList) ComponentMetricsHelper(com.twitter.heron.healthmgr.common.ComponentMetricsHelper) Symptom(com.microsoft.dhalion.detector.Symptom) ComponentMetrics(com.microsoft.dhalion.metrics.ComponentMetrics) MetricsStats(com.twitter.heron.healthmgr.common.MetricsStats)

Example 5 with ComponentMetricsHelper

use of com.twitter.heron.healthmgr.common.ComponentMetricsHelper in project incubator-heron by apache.

the class BackPressureDetector method detect.

/**
 * Detects all components initiating backpressure above the configured limit. Normally there
 * will be only one component
 *
 * @return A collection of all components causing backpressure.
 */
@Override
public List<Symptom> detect() {
    ArrayList<Symptom> result = new ArrayList<>();
    Map<String, ComponentMetrics> backpressureMetrics = bpSensor.get();
    for (ComponentMetrics compMetrics : backpressureMetrics.values()) {
        ComponentMetricsHelper compStats = new ComponentMetricsHelper(compMetrics);
        compStats.computeBpStats();
        if (compStats.getTotalBackpressure() > noiseFilterMillis) {
            LOG.info(String.format("Detected back pressure for %s, total back pressure is %f", compMetrics.getName(), compStats.getTotalBackpressure()));
            result.add(new Symptom(SYMPTOM_BACK_PRESSURE.text(), compMetrics));
        }
    }
    return result;
}
Also used : ArrayList(java.util.ArrayList) ComponentMetricsHelper(com.twitter.heron.healthmgr.common.ComponentMetricsHelper) Symptom(com.microsoft.dhalion.detector.Symptom) ComponentMetrics(com.microsoft.dhalion.metrics.ComponentMetrics)

Aggregations

Symptom (com.microsoft.dhalion.detector.Symptom)7 ComponentMetrics (com.microsoft.dhalion.metrics.ComponentMetrics)7 ComponentMetricsHelper (com.twitter.heron.healthmgr.common.ComponentMetricsHelper)7 MetricsStats (com.twitter.heron.healthmgr.common.MetricsStats)4 ArrayList (java.util.ArrayList)4 Diagnosis (com.microsoft.dhalion.diagnoser.Diagnosis)3 InstanceMetrics (com.microsoft.dhalion.metrics.InstanceMetrics)2