Search in sources :

Example 16 with InstanceMetrics

use of com.microsoft.dhalion.metrics.InstanceMetrics in project incubator-heron by apache.

the class BufferSizeSensorTest method registerStMgrInstanceMetricResponse.

static void registerStMgrInstanceMetricResponse(MetricsProvider metricsProvider, String metric, long value) {
    Map<String, ComponentMetrics> result = new HashMap<>();
    ComponentMetrics metrics = new ComponentMetrics("__stmgr__");
    InstanceMetrics instanceMetrics = new InstanceMetrics("stmgr-1");
    instanceMetrics.addMetric(metric, value);
    metrics.addInstanceMetric(instanceMetrics);
    result.put("__stmgr__", metrics);
    when(metricsProvider.getComponentMetrics(metric, DEFAULT_METRIC_DURATION, "__stmgr__")).thenReturn(result);
}
Also used : InstanceMetrics(com.microsoft.dhalion.metrics.InstanceMetrics) HashMap(java.util.HashMap) ComponentMetrics(com.microsoft.dhalion.metrics.ComponentMetrics)

Example 17 with InstanceMetrics

use of com.microsoft.dhalion.metrics.InstanceMetrics in project incubator-heron by apache.

the class MetricsCacheMetricsProviderTest method testGetTimeLineMetrics.

@Test
public void testGetTimeLineMetrics() {
    MetricsCacheMetricsProvider spyMetricsProvider = createMetricsProviderSpy();
    String metric = "count";
    String comp = "bolt";
    TopologyMaster.MetricResponse response = TopologyMaster.MetricResponse.newBuilder().setStatus(Status.newBuilder().setStatus(StatusCode.OK)).addMetric(TaskMetric.newBuilder().setInstanceId("container_1_bolt_1").addMetric(IndividualMetric.newBuilder().setName(metric).addIntervalValues(IntervalValue.newBuilder().setValue("104").setInterval(MetricInterval.newBuilder().setStart(1497481288).setEnd(1497481288))))).addMetric(TaskMetric.newBuilder().setInstanceId("container_1_bolt_2").addMetric(IndividualMetric.newBuilder().setName(metric).addIntervalValues(IntervalValue.newBuilder().setValue("12").setInterval(MetricInterval.newBuilder().setStart(1497481228).setEnd(1497481228))).addIntervalValues(IntervalValue.newBuilder().setValue("2").setInterval(MetricInterval.newBuilder().setStart(1497481348).setEnd(1497481348))).addIntervalValues(IntervalValue.newBuilder().setValue("3").setInterval(MetricInterval.newBuilder().setStart(1497481168).setEnd(1497481168))))).build();
    doReturn(response).when(spyMetricsProvider).getMetricsFromMetricsCache(metric, comp, Instant.ofEpochSecond(10), Duration.ofSeconds(60));
    Map<String, ComponentMetrics> metrics = spyMetricsProvider.getComponentMetrics(metric, Instant.ofEpochSecond(10), Duration.ofSeconds(60), comp);
    assertEquals(1, metrics.size());
    ComponentMetrics componentMetrics = metrics.get(comp);
    assertNotNull(componentMetrics);
    assertEquals(2, componentMetrics.getMetrics().size());
    InstanceMetrics instanceMetrics = componentMetrics.getMetrics("container_1_bolt_1");
    assertNotNull(instanceMetrics);
    assertEquals(1, instanceMetrics.getMetrics().size());
    Map<Instant, Double> metricValues = instanceMetrics.getMetrics().get(metric);
    assertEquals(1, metricValues.size());
    assertEquals(104, metricValues.get(Instant.ofEpochSecond(1497481288)).intValue());
    instanceMetrics = componentMetrics.getMetrics("container_1_bolt_2");
    assertNotNull(instanceMetrics);
    assertEquals(1, instanceMetrics.getMetrics().size());
    metricValues = instanceMetrics.getMetrics().get(metric);
    assertEquals(3, metricValues.size());
    assertEquals(12, metricValues.get(Instant.ofEpochSecond(1497481228L)).intValue());
    assertEquals(2, metricValues.get(Instant.ofEpochSecond(1497481348L)).intValue());
    assertEquals(3, metricValues.get(Instant.ofEpochSecond(1497481168L)).intValue());
}
Also used : InstanceMetrics(com.microsoft.dhalion.metrics.InstanceMetrics) Instant(java.time.Instant) TopologyMaster(com.twitter.heron.proto.tmaster.TopologyMaster) ComponentMetrics(com.microsoft.dhalion.metrics.ComponentMetrics) Test(org.junit.Test)

Example 18 with InstanceMetrics

use of com.microsoft.dhalion.metrics.InstanceMetrics in project incubator-heron by apache.

the class DataSkewDiagnoser method diagnose.

@Override
public Diagnosis diagnose(List<Symptom> symptoms) {
    List<Symptom> bpSymptoms = getBackPressureSymptoms(symptoms);
    Map<String, ComponentMetrics> processingRateSkewComponents = getProcessingRateSkewComponents(symptoms);
    Map<String, ComponentMetrics> waitQDisparityComponents = getWaitQDisparityComponents(symptoms);
    if (bpSymptoms.isEmpty() || processingRateSkewComponents.isEmpty() || waitQDisparityComponents.isEmpty()) {
        // Since there is no back pressure or disparate execute count, no action is needed
        return null;
    } else if (bpSymptoms.size() > 1) {
        // TODO handle cases where multiple detectors create back pressure symptom
        throw new IllegalStateException("Multiple back-pressure symptoms case");
    }
    ComponentMetrics bpMetrics = bpSymptoms.iterator().next().getComponent();
    // verify data skew, larger queue size and back pressure for the same component exists
    ComponentMetrics exeCountMetrics = processingRateSkewComponents.get(bpMetrics.getName());
    ComponentMetrics pendingBufferMetrics = waitQDisparityComponents.get(bpMetrics.getName());
    if (exeCountMetrics == null || pendingBufferMetrics == null) {
        // for the component with back pressure. This is not a data skew case
        return null;
    }
    ComponentMetrics mergedData = ComponentMetrics.merge(bpMetrics, ComponentMetrics.merge(exeCountMetrics, pendingBufferMetrics));
    ComponentMetricsHelper compStats = new ComponentMetricsHelper(mergedData);
    compStats.computeBpStats();
    MetricsStats exeStats = compStats.computeMinMaxStats(METRIC_EXE_COUNT);
    MetricsStats bufferStats = compStats.computeMinMaxStats(METRIC_BUFFER_SIZE);
    Symptom resultSymptom = null;
    for (InstanceMetrics boltMetrics : compStats.getBoltsWithBackpressure()) {
        double exeCount = boltMetrics.getMetricValueSum(METRIC_EXE_COUNT.text());
        double bufferSize = boltMetrics.getMetricValueSum(METRIC_BUFFER_SIZE.text());
        double bpValue = boltMetrics.getMetricValueSum(METRIC_BACK_PRESSURE.text());
        if (exeStats.getMetricMax() < 1.10 * exeCount && bufferStats.getMetricMax() < 2 * bufferSize) {
            LOG.info(String.format("DataSkew: %s back-pressure(%s), high execution count: %s and " + "high buffer size %s", boltMetrics.getName(), bpValue, exeCount, bufferSize));
            resultSymptom = new Symptom(SYMPTOM_DATA_SKEW.text(), mergedData);
        }
    }
    return resultSymptom != null ? new Diagnosis(DIAGNOSIS_DATA_SKEW.text(), resultSymptom) : null;
}
Also used : InstanceMetrics(com.microsoft.dhalion.metrics.InstanceMetrics) ComponentMetricsHelper(com.twitter.heron.healthmgr.common.ComponentMetricsHelper) Diagnosis(com.microsoft.dhalion.diagnoser.Diagnosis) Symptom(com.microsoft.dhalion.detector.Symptom) ComponentMetrics(com.microsoft.dhalion.metrics.ComponentMetrics) MetricsStats(com.twitter.heron.healthmgr.common.MetricsStats)

Example 19 with InstanceMetrics

use of com.microsoft.dhalion.metrics.InstanceMetrics in project incubator-heron by apache.

the class TrackerMetricsProvider method getComponentMetrics.

@Override
public Map<String, ComponentMetrics> getComponentMetrics(String metric, Instant startTime, Duration duration, String... components) {
    Map<String, ComponentMetrics> result = new HashMap<>();
    for (String component : components) {
        String response = getMetricsFromTracker(metric, component, startTime, duration);
        Map<String, InstanceMetrics> metrics = parse(response, component, metric);
        ComponentMetrics componentMetric = new ComponentMetrics(component, metrics);
        result.put(component, componentMetric);
    }
    return result;
}
Also used : InstanceMetrics(com.microsoft.dhalion.metrics.InstanceMetrics) HashMap(java.util.HashMap) ComponentMetrics(com.microsoft.dhalion.metrics.ComponentMetrics)

Example 20 with InstanceMetrics

use of com.microsoft.dhalion.metrics.InstanceMetrics in project incubator-heron by apache.

the class ComponentMetricsHelperTest method detectsMultipleCompIncreasingBuffer.

@Test
public void detectsMultipleCompIncreasingBuffer() {
    ComponentMetrics compMetrics;
    InstanceMetrics instanceMetrics;
    Map<Instant, Double> bufferSizes;
    compMetrics = new ComponentMetrics("bolt");
    instanceMetrics = new InstanceMetrics("i1");
    bufferSizes = new HashMap<>();
    bufferSizes.put(Instant.ofEpochSecond(1497892210), 0.0);
    bufferSizes.put(Instant.ofEpochSecond(1497892270), 300.0);
    bufferSizes.put(Instant.ofEpochSecond(1497892330), 600.0);
    bufferSizes.put(Instant.ofEpochSecond(1497892390), 900.0);
    bufferSizes.put(Instant.ofEpochSecond(1497892450), 1200.0);
    instanceMetrics.addMetric(METRIC_BUFFER_SIZE.text(), bufferSizes);
    compMetrics.addInstanceMetric(instanceMetrics);
    instanceMetrics = new InstanceMetrics("i2");
    bufferSizes = new HashMap<>();
    bufferSizes.put(Instant.ofEpochSecond(1497892270), 0.0);
    bufferSizes.put(Instant.ofEpochSecond(1497892330), 180.0);
    bufferSizes.put(Instant.ofEpochSecond(1497892390), 360.0);
    bufferSizes.put(Instant.ofEpochSecond(1497892450), 540.0);
    instanceMetrics.addMetric(METRIC_BUFFER_SIZE.text(), bufferSizes);
    compMetrics.addInstanceMetric(instanceMetrics);
    ComponentMetricsHelper helper = new ComponentMetricsHelper(compMetrics);
    helper.computeBufferSizeTrend();
    assertEquals(5, helper.getMaxBufferChangeRate(), 0.1);
    HashMap<String, InstanceMetrics> metrics = compMetrics.getMetrics();
    assertEquals(1, metrics.get("i1").getMetrics().get(METRIC_WAIT_Q_GROWTH_RATE.text()).size());
    assertEquals(5, metrics.get("i1").getMetricValueSum(METRIC_WAIT_Q_GROWTH_RATE.text()), 0.1);
    assertEquals(3, metrics.get("i2").getMetricValueSum(METRIC_WAIT_Q_GROWTH_RATE.text()), 0.1);
}
Also used : InstanceMetrics(com.microsoft.dhalion.metrics.InstanceMetrics) Instant(java.time.Instant) ComponentMetrics(com.microsoft.dhalion.metrics.ComponentMetrics) Test(org.junit.Test)

Aggregations

InstanceMetrics (com.microsoft.dhalion.metrics.InstanceMetrics)24 ComponentMetrics (com.microsoft.dhalion.metrics.ComponentMetrics)15 HashMap (java.util.HashMap)11 Test (org.junit.Test)8 Symptom (com.microsoft.dhalion.detector.Symptom)7 Instant (java.time.Instant)7 HealthPolicyConfig (com.twitter.heron.healthmgr.HealthPolicyConfig)4 Diagnosis (com.microsoft.dhalion.diagnoser.Diagnosis)3 VisibleForTesting (com.google.common.annotations.VisibleForTesting)2 ComponentMetricsHelper (com.twitter.heron.healthmgr.common.ComponentMetricsHelper)2 MetricsStats (com.twitter.heron.healthmgr.common.MetricsStats)2 BufferSizeSensor (com.twitter.heron.healthmgr.sensors.BufferSizeSensor)2 ExecuteCountSensor (com.twitter.heron.healthmgr.sensors.ExecuteCountSensor)2 TopologyMaster (com.twitter.heron.proto.tmaster.TopologyMaster)2 DocumentContext (com.jayway.jsonpath.DocumentContext)1 Action (com.microsoft.dhalion.resolver.Action)1 ContainerRestart (com.twitter.heron.healthmgr.common.HealthManagerEvents.ContainerRestart)1 MetricInterval (com.twitter.heron.proto.tmaster.TopologyMaster.MetricInterval)1 IndividualMetric (com.twitter.heron.proto.tmaster.TopologyMaster.MetricResponse.IndividualMetric)1 IntervalValue (com.twitter.heron.proto.tmaster.TopologyMaster.MetricResponse.IndividualMetric.IntervalValue)1