Search in sources :

Example 11 with ComponentMetrics

use of com.microsoft.dhalion.metrics.ComponentMetrics in project incubator-heron by apache.

the class MetricsCacheMetricsProvider method getComponentMetrics.

@Override
public Map<String, ComponentMetrics> getComponentMetrics(String metric, Instant startTime, Duration duration, String... components) {
    Map<String, ComponentMetrics> result = new HashMap<>();
    for (String component : components) {
        TopologyMaster.MetricResponse response = getMetricsFromMetricsCache(metric, component, startTime, duration);
        Map<String, InstanceMetrics> metrics = parse(response, component, metric, startTime);
        ComponentMetrics componentMetric = new ComponentMetrics(component, metrics);
        result.put(component, componentMetric);
    }
    return result;
}
Also used : InstanceMetrics(com.microsoft.dhalion.metrics.InstanceMetrics) HashMap(java.util.HashMap) TopologyMaster(com.twitter.heron.proto.tmaster.TopologyMaster) ComponentMetrics(com.microsoft.dhalion.metrics.ComponentMetrics)

Example 12 with ComponentMetrics

use of com.microsoft.dhalion.metrics.ComponentMetrics in project incubator-heron by apache.

the class SkewDetector method detect.

/**
 * Detects components experiencing data skew, instances with vastly different execute counts.
 *
 * @return A collection of affected components
 */
@Override
public List<Symptom> detect() {
    ArrayList<Symptom> result = new ArrayList<>();
    Map<String, ComponentMetrics> metrics = sensor.get();
    for (ComponentMetrics compMetrics : metrics.values()) {
        ComponentMetricsHelper compStats = new ComponentMetricsHelper(compMetrics);
        MetricsStats stats = compStats.computeMinMaxStats(sensor.getMetricName());
        if (stats.getMetricMax() > skewRatio * stats.getMetricMin()) {
            LOG.info(String.format("Detected skew for %s, min = %f, max = %f", compMetrics.getName(), stats.getMetricMin(), stats.getMetricMax()));
            result.add(new Symptom(symptomName.text(), compMetrics));
        }
    }
    return result;
}
Also used : ArrayList(java.util.ArrayList) ComponentMetricsHelper(com.twitter.heron.healthmgr.common.ComponentMetricsHelper) Symptom(com.microsoft.dhalion.detector.Symptom) ComponentMetrics(com.microsoft.dhalion.metrics.ComponentMetrics) MetricsStats(com.twitter.heron.healthmgr.common.MetricsStats)

Example 13 with ComponentMetrics

use of com.microsoft.dhalion.metrics.ComponentMetrics in project incubator-heron by apache.

the class BackPressureSensor method get.

/**
 * Computes the average (millis/sec) back-pressure caused by instances in the configured window
 *
 * @return the average value
 */
public Map<String, ComponentMetrics> get() {
    Map<String, ComponentMetrics> result = new HashMap<>();
    String[] boltComponents = topologyProvider.getBoltNames();
    for (String boltComponent : boltComponents) {
        String[] boltInstanceNames = packingPlanProvider.getBoltInstanceNames(boltComponent);
        Duration duration = getDuration();
        Map<String, InstanceMetrics> instanceMetrics = new HashMap<>();
        for (String boltInstanceName : boltInstanceNames) {
            String metric = getMetricName() + boltInstanceName;
            Map<String, ComponentMetrics> stmgrResult = metricsProvider.getComponentMetrics(metric, duration, COMPONENT_STMGR);
            if (stmgrResult.get(COMPONENT_STMGR) == null) {
                continue;
            }
            HashMap<String, InstanceMetrics> streamManagerResult = stmgrResult.get(COMPONENT_STMGR).getMetrics();
            if (streamManagerResult.isEmpty()) {
                continue;
            }
            // since a bolt instance belongs to one stream manager,
            // for tracker rest api: expect just one metrics manager instance in the result;
            // for tmaster/metricscache stat interface: expect a list
            Double valueSum = 0.0;
            for (Iterator<InstanceMetrics> it = streamManagerResult.values().iterator(); it.hasNext(); ) {
                InstanceMetrics stmgrInstanceResult = it.next();
                Double val = stmgrInstanceResult.getMetricValueSum(metric);
                if (val == null) {
                    continue;
                } else {
                    valueSum += val;
                }
            }
            double averageBp = valueSum / duration.getSeconds();
            // The maximum value of averageBp should be 1000, i.e. 1000 millis of BP per second. Due to
            // a bug in Heron (Issue: 1753), this value could be higher in some cases. The following
            // check partially corrects the reported BP value
            averageBp = averageBp > 1000 ? 1000 : averageBp;
            InstanceMetrics boltInstanceMetric = new InstanceMetrics(boltInstanceName, getMetricName(), averageBp);
            instanceMetrics.put(boltInstanceName, boltInstanceMetric);
        }
        ComponentMetrics componentMetrics = new ComponentMetrics(boltComponent, instanceMetrics);
        result.put(boltComponent, componentMetrics);
    }
    return result;
}
Also used : InstanceMetrics(com.microsoft.dhalion.metrics.InstanceMetrics) HashMap(java.util.HashMap) Duration(java.time.Duration) ComponentMetrics(com.microsoft.dhalion.metrics.ComponentMetrics)

Example 14 with ComponentMetrics

use of com.microsoft.dhalion.metrics.ComponentMetrics in project incubator-heron by apache.

the class LargeWaitQueueDetector method detect.

/**
 * Detects all components unable to keep up with input load, hence having a large pending buffer
 * or wait queue
 *
 * @return A collection of all components executing slower than input rate.
 */
@Override
public List<Symptom> detect() {
    ArrayList<Symptom> result = new ArrayList<>();
    Map<String, ComponentMetrics> bufferSizes = pendingBufferSensor.get();
    for (ComponentMetrics compMetrics : bufferSizes.values()) {
        ComponentMetricsHelper compStats = new ComponentMetricsHelper(compMetrics);
        MetricsStats stats = compStats.computeMinMaxStats(METRIC_BUFFER_SIZE.text());
        if (stats.getMetricMin() > sizeLimit) {
            LOG.info(String.format("Detected large wait queues for %s, smallest queue is %f", compMetrics.getName(), stats.getMetricMin()));
            result.add(new Symptom(SYMPTOM_LARGE_WAIT_Q.text(), compMetrics));
        }
    }
    return result;
}
Also used : ArrayList(java.util.ArrayList) ComponentMetricsHelper(com.twitter.heron.healthmgr.common.ComponentMetricsHelper) Symptom(com.microsoft.dhalion.detector.Symptom) ComponentMetrics(com.microsoft.dhalion.metrics.ComponentMetrics) MetricsStats(com.twitter.heron.healthmgr.common.MetricsStats)

Example 15 with ComponentMetrics

use of com.microsoft.dhalion.metrics.ComponentMetrics in project incubator-heron by apache.

the class BufferSizeSensorTest method providesBufferSizeMetricForBolts.

@Test
public void providesBufferSizeMetricForBolts() {
    TopologyProvider topologyProvider = mock(TopologyProvider.class);
    when(topologyProvider.getBoltNames()).thenReturn(new String[] { "bolt-1", "bolt-2" });
    String[] boltIds = new String[] { "container_1_bolt-1_1", "container_2_bolt-2_22", "container_1_bolt-2_333" };
    PackingPlanProvider packingPlanProvider = mock(PackingPlanProvider.class);
    when(packingPlanProvider.getBoltInstanceNames("bolt-1")).thenReturn(new String[] { boltIds[0] });
    when(packingPlanProvider.getBoltInstanceNames("bolt-2")).thenReturn(new String[] { boltIds[1], boltIds[2] });
    MetricsProvider metricsProvider = mock(MetricsProvider.class);
    for (String boltId : boltIds) {
        String metric = MetricName.METRIC_BUFFER_SIZE + boltId + MetricName.METRIC_BUFFER_SIZE_SUFFIX;
        registerStMgrInstanceMetricResponse(metricsProvider, metric, boltId.length());
    }
    BufferSizeSensor bufferSizeSensor = new BufferSizeSensor(null, packingPlanProvider, topologyProvider, metricsProvider);
    Map<String, ComponentMetrics> componentMetrics = bufferSizeSensor.get();
    assertEquals(2, componentMetrics.size());
    assertEquals(1, componentMetrics.get("bolt-1").getMetrics().size());
    assertEquals(boltIds[0].length(), componentMetrics.get("bolt-1").getMetrics(boltIds[0]).getMetricValueSum(MetricName.METRIC_BUFFER_SIZE.text()).intValue());
    assertEquals(2, componentMetrics.get("bolt-2").getMetrics().size());
    assertEquals(boltIds[1].length(), componentMetrics.get("bolt-2").getMetrics(boltIds[1]).getMetricValueSum(MetricName.METRIC_BUFFER_SIZE.text()).intValue());
    assertEquals(boltIds[2].length(), componentMetrics.get("bolt-2").getMetrics(boltIds[2]).getMetricValueSum(MetricName.METRIC_BUFFER_SIZE.text()).intValue());
}
Also used : TopologyProvider(com.twitter.heron.healthmgr.common.TopologyProvider) PackingPlanProvider(com.twitter.heron.healthmgr.common.PackingPlanProvider) MetricsProvider(com.microsoft.dhalion.api.MetricsProvider) ComponentMetrics(com.microsoft.dhalion.metrics.ComponentMetrics) Test(org.junit.Test)

Aggregations

ComponentMetrics (com.microsoft.dhalion.metrics.ComponentMetrics)29 Symptom (com.microsoft.dhalion.detector.Symptom)16 InstanceMetrics (com.microsoft.dhalion.metrics.InstanceMetrics)15 Test (org.junit.Test)15 HashMap (java.util.HashMap)14 ComponentMetricsHelper (com.twitter.heron.healthmgr.common.ComponentMetricsHelper)7 Diagnosis (com.microsoft.dhalion.diagnoser.Diagnosis)6 HealthPolicyConfig (com.twitter.heron.healthmgr.HealthPolicyConfig)6 ArrayList (java.util.ArrayList)6 MetricsStats (com.twitter.heron.healthmgr.common.MetricsStats)4 Instant (java.time.Instant)4 MetricsProvider (com.microsoft.dhalion.api.MetricsProvider)3 PackingPlanProvider (com.twitter.heron.healthmgr.common.PackingPlanProvider)3 TopologyProvider (com.twitter.heron.healthmgr.common.TopologyProvider)3 BufferSizeSensor (com.twitter.heron.healthmgr.sensors.BufferSizeSensor)3 Action (com.microsoft.dhalion.resolver.Action)2 ExecuteCountSensor (com.twitter.heron.healthmgr.sensors.ExecuteCountSensor)2 TopologyMaster (com.twitter.heron.proto.tmaster.TopologyMaster)2 PackingPlan (com.twitter.heron.spi.packing.PackingPlan)2 TopologyAPI (com.twitter.heron.api.generated.TopologyAPI)1