Search in sources :

Example 1 with Alert

use of io.cdap.cdap.etl.api.Alert in project cdap by caskdata.

the class BaseRDDCollection method publishAlerts.

@Override
public void publishAlerts(StageSpec stageSpec, StageStatisticsCollector collector) throws Exception {
    PluginFunctionContext pluginFunctionContext = new PluginFunctionContext(stageSpec, sec, collector);
    AlertPublisher alertPublisher = pluginFunctionContext.createPlugin();
    PipelineRuntime pipelineRuntime = new SparkPipelineRuntime(sec);
    AlertPublisherContext alertPublisherContext = new DefaultAlertPublisherContext(pipelineRuntime, stageSpec, sec.getMessagingContext(), sec.getAdmin());
    alertPublisher.initialize(alertPublisherContext);
    StageMetrics stageMetrics = new DefaultStageMetrics(sec.getMetrics(), stageSpec.getName());
    TrackedIterator<Alert> trackedAlerts = new TrackedIterator<>(((JavaRDD<Alert>) rdd).collect().iterator(), stageMetrics, Constants.Metrics.RECORDS_IN);
    alertPublisher.publish(trackedAlerts);
    alertPublisher.destroy();
}
Also used : PluginFunctionContext(io.cdap.cdap.etl.spark.function.PluginFunctionContext) AlertPublisher(io.cdap.cdap.etl.api.AlertPublisher) PipelineRuntime(io.cdap.cdap.etl.common.PipelineRuntime) SparkPipelineRuntime(io.cdap.cdap.etl.spark.SparkPipelineRuntime) SparkPipelineRuntime(io.cdap.cdap.etl.spark.SparkPipelineRuntime) TrackedIterator(io.cdap.cdap.etl.common.TrackedIterator) Alert(io.cdap.cdap.etl.api.Alert) AlertPublisherContext(io.cdap.cdap.etl.api.AlertPublisherContext) DefaultAlertPublisherContext(io.cdap.cdap.etl.common.DefaultAlertPublisherContext) DefaultAlertPublisherContext(io.cdap.cdap.etl.common.DefaultAlertPublisherContext) StageMetrics(io.cdap.cdap.etl.api.StageMetrics) DefaultStageMetrics(io.cdap.cdap.etl.common.DefaultStageMetrics) DefaultStageMetrics(io.cdap.cdap.etl.common.DefaultStageMetrics) JavaRDD(org.apache.spark.api.java.JavaRDD)

Example 2 with Alert

use of io.cdap.cdap.etl.api.Alert in project cdap by caskdata.

the class DataPipelineTest method testAlertPublisher.

private void testAlertPublisher(Engine engine) throws Exception {
    String sourceName = "alertSource" + engine.name();
    String sinkName = "alertSink" + engine.name();
    String topic = "alertTopic" + engine.name();
    /*
     * source --> nullAlert --> sink
     *               |
     *               |--> TMS publisher
     */
    ETLBatchConfig config = ETLBatchConfig.builder().setEngine(engine).addStage(new ETLStage("source", MockSource.getPlugin(sourceName))).addStage(new ETLStage("nullAlert", NullAlertTransform.getPlugin("id"))).addStage(new ETLStage("sink", MockSink.getPlugin(sinkName))).addStage(new ETLStage("tms alert", TMSAlertPublisher.getPlugin(topic, NamespaceId.DEFAULT.getNamespace()))).addConnection("source", "nullAlert").addConnection("nullAlert", "sink").addConnection("nullAlert", "tms alert").build();
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, config);
    ApplicationId appId = NamespaceId.DEFAULT.app("AlertTest-" + engine);
    ApplicationManager appManager = deployApplication(appId, appRequest);
    Schema schema = Schema.recordOf("x", Schema.Field.of("id", Schema.nullableOf(Schema.of(Schema.Type.LONG))));
    StructuredRecord record1 = StructuredRecord.builder(schema).set("id", 1L).build();
    StructuredRecord record2 = StructuredRecord.builder(schema).set("id", 2L).build();
    StructuredRecord alertRecord = StructuredRecord.builder(schema).build();
    DataSetManager<Table> sourceTable = getDataset(sourceName);
    MockSource.writeInput(sourceTable, ImmutableList.of(record1, record2, alertRecord));
    WorkflowManager manager = appManager.getWorkflowManager(SmartWorkflow.NAME);
    manager.start();
    manager.waitForRun(ProgramRunStatus.COMPLETED, 3, TimeUnit.MINUTES);
    DataSetManager<Table> sinkTable = getDataset(sinkName);
    Set<StructuredRecord> actual = new HashSet<>(MockSink.readOutput(sinkTable));
    Set<StructuredRecord> expected = ImmutableSet.of(record1, record2);
    Assert.assertEquals(expected, actual);
    MessageFetcher messageFetcher = getMessagingContext().getMessageFetcher();
    Set<Alert> actualMessages = new HashSet<>();
    try (CloseableIterator<Message> iter = messageFetcher.fetch(NamespaceId.DEFAULT.getNamespace(), topic, 5, 0)) {
        while (iter.hasNext()) {
            Message message = iter.next();
            Alert alert = message.decodePayload(r -> GSON.fromJson(r, Alert.class));
            actualMessages.add(alert);
        }
    }
    Set<Alert> expectedMessages = ImmutableSet.of(new Alert("nullAlert", new HashMap<>()));
    Assert.assertEquals(expectedMessages, actualMessages);
    validateMetric(3, appId, "source.records.out");
    validateMetric(3, appId, "nullAlert.records.in");
    validateMetric(2, appId, "nullAlert.records.out");
    validateMetric(1, appId, "nullAlert.records.alert");
    validateMetric(2, appId, "sink.records.in");
    validateMetric(1, appId, "tms alert.records.in");
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) MessageFetcher(io.cdap.cdap.api.messaging.MessageFetcher) Table(io.cdap.cdap.api.dataset.table.Table) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) Message(io.cdap.cdap.api.messaging.Message) SpamMessage(io.cdap.cdap.datapipeline.mock.SpamMessage) HashMap(java.util.HashMap) Schema(io.cdap.cdap.api.data.schema.Schema) WorkflowManager(io.cdap.cdap.test.WorkflowManager) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) Alert(io.cdap.cdap.etl.api.Alert) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) HashSet(java.util.HashSet)

Example 3 with Alert

use of io.cdap.cdap.etl.api.Alert in project cdap by caskdata.

the class SmartWorkflow method destroy.

@Override
public void destroy() {
    WorkflowContext workflowContext = getContext();
    PipelineRuntime pipelineRuntime = new PipelineRuntime(workflowContext, workflowMetrics);
    // Execute the post actions only if pipeline is not running in preview mode.
    if (!workflowContext.getDataTracer(PostAction.PLUGIN_TYPE).isEnabled()) {
        for (Map.Entry<String, PostAction> endingActionEntry : postActions.entrySet()) {
            String name = endingActionEntry.getKey();
            PostAction action = endingActionEntry.getValue();
            StageSpec stageSpec = stageSpecs.get(name);
            BatchActionContext context = new WorkflowBackedActionContext(workflowContext, pipelineRuntime, stageSpec);
            try {
                action.run(context);
            } catch (Throwable t) {
                LOG.error("Error while running post action {}.", name, t);
            }
        }
    }
    Map<String, String> connectorDatasets = GSON.fromJson(workflowContext.getWorkflowSpecification().getProperty(Constants.CONNECTOR_DATASETS), STAGE_DATASET_MAP);
    // publish all alerts
    for (Map.Entry<String, AlertPublisher> alertPublisherEntry : alertPublishers.entrySet()) {
        String stageName = alertPublisherEntry.getKey();
        AlertPublisher alertPublisher = alertPublisherEntry.getValue();
        FileSet alertConnector = workflowContext.getDataset(connectorDatasets.get(stageName));
        try (CloseableIterator<Alert> alerts = new AlertReader(alertConnector)) {
            if (!alerts.hasNext()) {
                continue;
            }
            StageMetrics stageMetrics = new DefaultStageMetrics(workflowMetrics, stageName);
            StageSpec stageSpec = stageSpecs.get(stageName);
            AlertPublisherContext alertContext = new DefaultAlertPublisherContext(pipelineRuntime, stageSpec, workflowContext, workflowContext.getAdmin());
            alertPublisher.initialize(alertContext);
            TrackedIterator<Alert> trackedIterator = new TrackedIterator<>(alerts, stageMetrics, Constants.Metrics.RECORDS_IN);
            alertPublisher.publish(trackedIterator);
        } catch (Exception e) {
            LOG.warn("Stage {} had errors publishing alerts. Alerts may not have been published.", stageName, e);
        } finally {
            try {
                alertPublisher.destroy();
            } catch (Exception e) {
                LOG.warn("Error destroying alert publisher for stage {}", stageName, e);
            }
        }
    }
    ProgramStatus status = getContext().getState().getStatus();
    if (status == ProgramStatus.FAILED) {
        WRAPPERLOGGER.error("Pipeline '{}' failed.", getContext().getApplicationSpecification().getName());
    } else {
        WRAPPERLOGGER.info("Pipeline '{}' {}.", getContext().getApplicationSpecification().getName(), status == ProgramStatus.COMPLETED ? "succeeded" : status.name().toLowerCase());
    }
    MacroEvaluator macroEvaluator = new DefaultMacroEvaluator(pipelineRuntime.getArguments(), workflowContext.getLogicalStartTime(), workflowContext, workflowContext, workflowContext.getNamespace());
    // Get resolved plugin properties
    Map<String, Map<String, String>> resolvedProperties = new HashMap<>();
    for (StageSpec spec : stageSpecs.values()) {
        String stageName = spec.getName();
        resolvedProperties.put(stageName, workflowContext.getPluginProperties(stageName, macroEvaluator).getProperties());
    }
    // Add resolved plugin properties to workflow token as a JSON String
    workflowContext.getToken().put(RESOLVED_PLUGIN_PROPERTIES_MAP, GSON.toJson(resolvedProperties));
    // record only if the Workflow is successful
    if (status != ProgramStatus.COMPLETED) {
        return;
    }
    // Collect field operations from each phase
    WorkflowToken token = workflowContext.getToken();
    List<NodeValue> allNodeValues = token.getAll(Constants.FIELD_OPERATION_KEY_IN_WORKFLOW_TOKEN);
    if (allNodeValues.isEmpty()) {
        // no field lineage recorded by any stage
        return;
    }
    Map<String, List<FieldOperation>> allStageOperations = new HashMap<>();
    for (StageSpec stageSpec : stageSpecs.values()) {
        allStageOperations.put(stageSpec.getName(), new ArrayList<>());
    }
    for (NodeValue nodeValue : allNodeValues) {
        Map<String, List<FieldOperation>> stageOperations = GSON.fromJson(nodeValue.getValue().toString(), STAGE_OPERATIONS_MAP);
        for (Map.Entry<String, List<FieldOperation>> entry : stageOperations.entrySet()) {
            // ignore them
            if (allStageOperations.containsKey(entry.getKey())) {
                allStageOperations.get(entry.getKey()).addAll(entry.getValue());
            }
        }
    }
    FieldLineageProcessor processor = new FieldLineageProcessor(spec);
    Set<Operation> processedOperations = processor.validateAndConvert(allStageOperations);
    if (!processedOperations.isEmpty()) {
        workflowContext.record(processedOperations);
    }
}
Also used : NodeValue(io.cdap.cdap.api.workflow.NodeValue) PipelineRuntime(io.cdap.cdap.etl.common.PipelineRuntime) DefaultMacroEvaluator(io.cdap.cdap.etl.common.DefaultMacroEvaluator) MacroEvaluator(io.cdap.cdap.api.macro.MacroEvaluator) BatchActionContext(io.cdap.cdap.etl.api.batch.BatchActionContext) WorkflowBackedActionContext(io.cdap.cdap.etl.batch.WorkflowBackedActionContext) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) WorkflowToken(io.cdap.cdap.api.workflow.WorkflowToken) FieldOperation(io.cdap.cdap.etl.api.lineage.field.FieldOperation) Operation(io.cdap.cdap.api.lineage.field.Operation) AlertReader(io.cdap.cdap.etl.batch.connector.AlertReader) StageSpec(io.cdap.cdap.etl.proto.v2.spec.StageSpec) DefaultMacroEvaluator(io.cdap.cdap.etl.common.DefaultMacroEvaluator) List(java.util.List) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) StageMetrics(io.cdap.cdap.etl.api.StageMetrics) DefaultStageMetrics(io.cdap.cdap.etl.common.DefaultStageMetrics) AlertPublisherContext(io.cdap.cdap.etl.api.AlertPublisherContext) DefaultAlertPublisherContext(io.cdap.cdap.etl.common.DefaultAlertPublisherContext) FieldLineageProcessor(io.cdap.cdap.etl.lineage.FieldLineageProcessor) AlertPublisher(io.cdap.cdap.etl.api.AlertPublisher) FileSet(io.cdap.cdap.api.dataset.lib.FileSet) TrackedIterator(io.cdap.cdap.etl.common.TrackedIterator) WorkflowContext(io.cdap.cdap.api.workflow.WorkflowContext) DisjointConnectionsException(io.cdap.cdap.etl.planner.DisjointConnectionsException) ValidationException(io.cdap.cdap.etl.api.validation.ValidationException) Alert(io.cdap.cdap.etl.api.Alert) PostAction(io.cdap.cdap.etl.api.batch.PostAction) DefaultAlertPublisherContext(io.cdap.cdap.etl.common.DefaultAlertPublisherContext) Map(java.util.Map) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) DefaultStageMetrics(io.cdap.cdap.etl.common.DefaultStageMetrics) ProgramStatus(io.cdap.cdap.api.ProgramStatus)

Example 4 with Alert

use of io.cdap.cdap.etl.api.Alert in project cdap by caskdata.

the class SparkPipelineRunner method addEmitted.

private EmittedRecords.Builder addEmitted(EmittedRecords.Builder builder, PipelinePhase pipelinePhase, StageSpec stageSpec, SparkCollection<RecordInfo<Object>> stageData, Dag dag, Set<String> branchers, Set<String> shufflers, boolean hasErrors, boolean hasAlerts) {
    builder.setRawData(stageData);
    if (shouldCache(dag, stageSpec.getName(), branchers, shufflers)) {
        stageData = stageData.cache();
    }
    if (hasErrors) {
        SparkCollection<ErrorRecord<Object>> errors = stageData.flatMap(stageSpec, new ErrorPassFilter<Object>());
        builder.setErrors(errors);
    }
    if (hasAlerts) {
        SparkCollection<Alert> alerts = stageData.flatMap(stageSpec, new AlertPassFilter());
        builder.setAlerts(alerts);
    }
    if (SplitterTransform.PLUGIN_TYPE.equals(stageSpec.getPluginType())) {
        // set collections for each port, implemented as a filter on the port.
        for (StageSpec.Port portSpec : stageSpec.getOutputPorts().values()) {
            String port = portSpec.getPort();
            SparkCollection<Object> portData = filterPortRecords(stageSpec, stageData, port);
            builder.addPort(port, portData);
        }
    } else {
        SparkCollection<Object> outputs = filterPortRecords(stageSpec, stageData, null);
        builder.setOutput(outputs);
    }
    return builder;
}
Also used : StageSpec(io.cdap.cdap.etl.proto.v2.spec.StageSpec) Alert(io.cdap.cdap.etl.api.Alert) AlertPassFilter(io.cdap.cdap.etl.spark.function.AlertPassFilter) ErrorRecord(io.cdap.cdap.etl.api.ErrorRecord)

Example 5 with Alert

use of io.cdap.cdap.etl.api.Alert in project cdap by caskdata.

the class DataStreamsTest method testAlertPublisher.

@Test
public void testAlertPublisher() throws Exception {
    String sinkName = "alertSink";
    String topic = "alertTopic";
    Schema schema = Schema.recordOf("x", Schema.Field.of("id", Schema.nullableOf(Schema.of(Schema.Type.LONG))));
    StructuredRecord record1 = StructuredRecord.builder(schema).set("id", 1L).build();
    StructuredRecord record2 = StructuredRecord.builder(schema).set("id", 2L).build();
    StructuredRecord alertRecord = StructuredRecord.builder(schema).build();
    /*
     * source --> nullAlert --> sink
     *               |
     *               |--> TMS publisher
     */
    DataStreamsConfig config = DataStreamsConfig.builder().setBatchInterval("5s").addStage(new ETLStage("source", MockSource.getPlugin(schema, ImmutableList.of(record1, record2, alertRecord)))).addStage(new ETLStage("nullAlert", NullAlertTransform.getPlugin("id"))).addStage(new ETLStage("sink", MockSink.getPlugin(sinkName))).addStage(new ETLStage("tms", TMSAlertPublisher.getPlugin(topic, NamespaceId.DEFAULT.getNamespace()))).addConnection("source", "nullAlert").addConnection("nullAlert", "sink").addConnection("nullAlert", "tms").setCheckpointDir(checkpointDir).build();
    AppRequest<DataStreamsConfig> appRequest = new AppRequest<>(APP_ARTIFACT, config);
    ApplicationId appId = NamespaceId.DEFAULT.app("AlertTest");
    ApplicationManager appManager = deployApplication(appId, appRequest);
    SparkManager sparkManager = appManager.getSparkManager(DataStreamsSparkLauncher.NAME);
    sparkManager.start();
    sparkManager.waitForRun(ProgramRunStatus.RUNNING, 10, TimeUnit.SECONDS);
    final Set<StructuredRecord> expectedRecords = ImmutableSet.of(record1, record2);
    final Set<Alert> expectedMessages = ImmutableSet.of(new Alert("nullAlert", new HashMap<String, String>()));
    final DataSetManager<Table> sinkTable = getDataset(sinkName);
    Tasks.waitFor(true, () -> {
        // get alerts from TMS
        try {
            getMessagingAdmin(NamespaceId.DEFAULT.getNamespace()).getTopicProperties(topic);
        } catch (TopicNotFoundException e) {
            return false;
        }
        MessageFetcher messageFetcher = getMessagingContext().getMessageFetcher();
        Set<Alert> actualMessages = new HashSet<>();
        try (CloseableIterator<Message> iter = messageFetcher.fetch(NamespaceId.DEFAULT.getNamespace(), topic, 5, 0)) {
            while (iter.hasNext()) {
                Message message = iter.next();
                Alert alert = message.decodePayload(r -> GSON.fromJson(r, Alert.class));
                actualMessages.add(alert);
            }
        }
        // get records from sink
        sinkTable.flush();
        Set<StructuredRecord> outputRecords = new HashSet<>(MockSink.readOutput(sinkTable));
        return expectedRecords.equals(outputRecords) && expectedMessages.equals(actualMessages);
    }, 4, TimeUnit.MINUTES);
    sparkManager.stop();
    sparkManager.waitForStopped(10, TimeUnit.SECONDS);
    validateMetric(appId, "source.records.out", 3);
    validateMetric(appId, "nullAlert.records.in", 3);
    validateMetric(appId, "nullAlert.records.out", 2);
    validateMetric(appId, "nullAlert.records.alert", 1);
    validateMetric(appId, "sink.records.in", 2);
    validateMetric(appId, "tms.records.in", 1);
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) MessageFetcher(io.cdap.cdap.api.messaging.MessageFetcher) SparkManager(io.cdap.cdap.test.SparkManager) Table(io.cdap.cdap.api.dataset.table.Table) Message(io.cdap.cdap.api.messaging.Message) HashMap(java.util.HashMap) TopicNotFoundException(io.cdap.cdap.api.messaging.TopicNotFoundException) Schema(io.cdap.cdap.api.data.schema.Schema) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) DataStreamsConfig(io.cdap.cdap.etl.proto.v2.DataStreamsConfig) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) Alert(io.cdap.cdap.etl.api.Alert) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

Alert (io.cdap.cdap.etl.api.Alert)8 AlertPublisher (io.cdap.cdap.etl.api.AlertPublisher)3 AlertPublisherContext (io.cdap.cdap.etl.api.AlertPublisherContext)3 StageMetrics (io.cdap.cdap.etl.api.StageMetrics)3 DefaultAlertPublisherContext (io.cdap.cdap.etl.common.DefaultAlertPublisherContext)3 DefaultStageMetrics (io.cdap.cdap.etl.common.DefaultStageMetrics)3 PipelineRuntime (io.cdap.cdap.etl.common.PipelineRuntime)3 TrackedIterator (io.cdap.cdap.etl.common.TrackedIterator)3 HashMap (java.util.HashMap)3 StructuredRecord (io.cdap.cdap.api.data.format.StructuredRecord)2 Schema (io.cdap.cdap.api.data.schema.Schema)2 Table (io.cdap.cdap.api.dataset.table.Table)2 MacroEvaluator (io.cdap.cdap.api.macro.MacroEvaluator)2 Message (io.cdap.cdap.api.messaging.Message)2 MessageFetcher (io.cdap.cdap.api.messaging.MessageFetcher)2 DefaultMacroEvaluator (io.cdap.cdap.etl.common.DefaultMacroEvaluator)2 ETLStage (io.cdap.cdap.etl.proto.v2.ETLStage)2 StageSpec (io.cdap.cdap.etl.proto.v2.spec.StageSpec)2 AppRequest (io.cdap.cdap.proto.artifact.AppRequest)2 ApplicationId (io.cdap.cdap.proto.id.ApplicationId)2