Search in sources :

Example 1 with UpdateFailedEvent

use of com.sequenceiq.periscope.monitor.event.UpdateFailedEvent in project cloudbreak by hortonworks.

the class AmbariAgentHealthEvaluator method run.

@Override
public void run() {
    Cluster cluster = clusterService.find(clusterId);
    MDCBuilder.buildMdcContext(cluster);
    LOGGER.info("Checking '{}' alerts.", AMBARI_AGENT_HEARTBEAT);
    try {
        AmbariClient ambariClient = ambariClientProvider.createAmbariClient(cluster);
        List<Map<String, Object>> alertHistory = ambariClient.getAlert(AMBARI_AGENT_HEARTBEAT_DEF_NAME);
        if (!alertHistory.isEmpty()) {
            List<String> hostNamesToRecover = new ArrayList<>();
            for (Map<String, Object> history : alertHistory) {
                String currentState = (String) history.get(ALERT_STATE);
                if (isAlertStateMet(currentState)) {
                    String hostName = (String) history.get(HOST_NAME);
                    hostNamesToRecover.add(hostName);
                    LOGGER.info("Alert: {} is in '{}' state for host '{}'.", AMBARI_AGENT_HEARTBEAT, currentState, hostName);
                }
            }
            if (!hostNamesToRecover.isEmpty()) {
                hostNamesToRecover.forEach(hn -> LOGGER.info("Host to recover: {}", hn));
                CloudbreakClient cbClient = cloudbreakClientConfiguration.cloudbreakClient();
                FailureReport failureReport = new FailureReport();
                failureReport.setFailedNodes(hostNamesToRecover);
                cbClient.clusterEndpoint().failureReport(cluster.getStackId(), failureReport);
            }
        }
    } catch (Exception e) {
        LOGGER.warn(String.format("Failed to retrieve '%s' alerts.", AMBARI_AGENT_HEARTBEAT), e);
        publishEvent(new UpdateFailedEvent(clusterId));
    }
}
Also used : CloudbreakClient(com.sequenceiq.cloudbreak.client.CloudbreakClient) FailureReport(com.sequenceiq.cloudbreak.api.model.FailureReport) UpdateFailedEvent(com.sequenceiq.periscope.monitor.event.UpdateFailedEvent) ArrayList(java.util.ArrayList) Cluster(com.sequenceiq.periscope.domain.Cluster) Map(java.util.Map) AmbariClient(com.sequenceiq.ambari.client.AmbariClient)

Example 2 with UpdateFailedEvent

use of com.sequenceiq.periscope.monitor.event.UpdateFailedEvent in project cloudbreak by hortonworks.

the class MetricEvaluator method run.

@Override
public void run() {
    Cluster cluster = clusterService.find(clusterId);
    MDCBuilder.buildMdcContext(cluster);
    AmbariClient ambariClient = ambariClientProvider.createAmbariClient(cluster);
    try {
        for (MetricAlert alert : alertRepository.findAllByCluster(clusterId)) {
            String alertName = alert.getName();
            LOGGER.info("Checking metric based alert: '{}'", alertName);
            List<Map<String, Object>> alertHistory = ambariClient.getAlertHistory(alert.getDefinitionName(), 1);
            int historySize = alertHistory.size();
            if (historySize > 1) {
                LOGGER.debug("Multiple results found for alert: {}, probably HOST alert, ignoring now..", alertName);
                continue;
            }
            if (!alertHistory.isEmpty()) {
                Map<String, Object> history = alertHistory.get(0);
                String currentState = (String) history.get(ALERT_STATE);
                if (isAlertStateMet(currentState, alert)) {
                    long elapsedTime = getPeriod(history);
                    LOGGER.info("Alert: {} is in '{}' state since {} min(s)", alertName, currentState, ClusterUtils.TIME_FORMAT.format((double) elapsedTime / ClusterUtils.MIN_IN_MS));
                    if (isPeriodReached(alert, elapsedTime) && isPolicyAttached(alert)) {
                        publishEvent(new ScalingEvent(alert));
                        break;
                    }
                }
            }
        }
    } catch (Exception e) {
        LOGGER.error("Failed to retrieve alert history", e);
        publishEvent(new UpdateFailedEvent(clusterId));
    }
}
Also used : UpdateFailedEvent(com.sequenceiq.periscope.monitor.event.UpdateFailedEvent) Cluster(com.sequenceiq.periscope.domain.Cluster) ScalingEvent(com.sequenceiq.periscope.monitor.event.ScalingEvent) MetricAlert(com.sequenceiq.periscope.domain.MetricAlert) Map(java.util.Map) AmbariClient(com.sequenceiq.ambari.client.AmbariClient)

Example 3 with UpdateFailedEvent

use of com.sequenceiq.periscope.monitor.event.UpdateFailedEvent in project cloudbreak by hortonworks.

the class PrometheusEvaluator method run.

@Override
public void run() {
    try {
        Cluster cluster = clusterService.find(clusterId);
        MDCBuilder.buildMdcContext(cluster);
        TlsConfiguration tlsConfig = tlsSecurityService.getConfiguration(cluster);
        Client client = RestClientUtil.createClient(tlsConfig.getServerCert(), tlsConfig.getClientCert(), tlsConfig.getClientKey(), true, PrometheusEvaluator.class);
        String prometheusAddress = String.format("https://%s:%s/prometheus", cluster.getAmbari().getHost(), cluster.getPort());
        WebTarget target = client.target(prometheusAddress);
        for (PrometheusAlert alert : alertRepository.findAllByCluster(clusterId)) {
            String alertName = alert.getName();
            LOGGER.info("Checking Prometheus based alert: '{}'", alertName);
            String query = URLEncoder.encode(String.format("ALERTS{alertname=\"%s\"}[%dm]", alert.getName(), alert.getPeriod()), "UTF-8");
            Response response = target.path("/api/v1/query").queryParam("query", query).request().header("Accept", MediaType.APPLICATION_JSON_VALUE).get();
            PrometheusResponse prometheusResponse = JaxRSUtil.response(response, PrometheusResponse.class);
            boolean triggerScale = false;
            switch(alert.getAlertState()) {
                case OK:
                    triggerScale = prometheusResponse.getData().getResult().isEmpty();
                    break;
                case CRITICAL:
                    for (Result alertResult : prometheusResponse.getData().getResult()) {
                        if ("firing".equals(alertResult.getMetric().getAlertstate())) {
                            List<Object> lastSample = alertResult.getValues().get(alertResult.getValues().size() - 1);
                            Object alertValue = lastSample.get(1);
                            if (alertValue instanceof String) {
                                if ("0".equals(alertValue)) {
                                    break;
                                }
                                triggerScale = true;
                            }
                        }
                    }
                    break;
                default:
                    triggerScale = false;
                    break;
            }
            if (triggerScale && isPolicyAttached(alert)) {
                publishEvent(new ScalingEvent(alert));
            }
        }
    } catch (Exception e) {
        LOGGER.error("Failed to retrieve alerts from Prometheus", e);
        publishEvent(new UpdateFailedEvent(clusterId));
    }
}
Also used : UpdateFailedEvent(com.sequenceiq.periscope.monitor.event.UpdateFailedEvent) PrometheusResponse(com.sequenceiq.periscope.model.PrometheusResponse) Cluster(com.sequenceiq.periscope.domain.Cluster) Result(com.sequenceiq.periscope.model.PrometheusResponse.Result) PrometheusResponse(com.sequenceiq.periscope.model.PrometheusResponse) Response(javax.ws.rs.core.Response) PrometheusAlert(com.sequenceiq.periscope.domain.PrometheusAlert) ScalingEvent(com.sequenceiq.periscope.monitor.event.ScalingEvent) TlsConfiguration(com.sequenceiq.periscope.model.TlsConfiguration) WebTarget(javax.ws.rs.client.WebTarget) Client(javax.ws.rs.client.Client)

Aggregations

Cluster (com.sequenceiq.periscope.domain.Cluster)3 UpdateFailedEvent (com.sequenceiq.periscope.monitor.event.UpdateFailedEvent)3 AmbariClient (com.sequenceiq.ambari.client.AmbariClient)2 ScalingEvent (com.sequenceiq.periscope.monitor.event.ScalingEvent)2 Map (java.util.Map)2 FailureReport (com.sequenceiq.cloudbreak.api.model.FailureReport)1 CloudbreakClient (com.sequenceiq.cloudbreak.client.CloudbreakClient)1 MetricAlert (com.sequenceiq.periscope.domain.MetricAlert)1 PrometheusAlert (com.sequenceiq.periscope.domain.PrometheusAlert)1 PrometheusResponse (com.sequenceiq.periscope.model.PrometheusResponse)1 Result (com.sequenceiq.periscope.model.PrometheusResponse.Result)1 TlsConfiguration (com.sequenceiq.periscope.model.TlsConfiguration)1 ArrayList (java.util.ArrayList)1 Client (javax.ws.rs.client.Client)1 WebTarget (javax.ws.rs.client.WebTarget)1 Response (javax.ws.rs.core.Response)1