use of io.cdap.cdap.etl.api.StageMetrics in project cdap by caskdata.
the class BaseRDDCollection method publishAlerts.
@Override
public void publishAlerts(StageSpec stageSpec, StageStatisticsCollector collector) throws Exception {
PluginFunctionContext pluginFunctionContext = new PluginFunctionContext(stageSpec, sec, collector);
AlertPublisher alertPublisher = pluginFunctionContext.createPlugin();
PipelineRuntime pipelineRuntime = new SparkPipelineRuntime(sec);
AlertPublisherContext alertPublisherContext = new DefaultAlertPublisherContext(pipelineRuntime, stageSpec, sec.getMessagingContext(), sec.getAdmin());
alertPublisher.initialize(alertPublisherContext);
StageMetrics stageMetrics = new DefaultStageMetrics(sec.getMetrics(), stageSpec.getName());
TrackedIterator<Alert> trackedAlerts = new TrackedIterator<>(((JavaRDD<Alert>) rdd).collect().iterator(), stageMetrics, Constants.Metrics.RECORDS_IN);
alertPublisher.publish(trackedAlerts);
alertPublisher.destroy();
}
use of io.cdap.cdap.etl.api.StageMetrics in project cdap by caskdata.
the class TransformExecutorFactory method getTransformation.
@SuppressWarnings("unchecked")
protected <IN, OUT> TrackedTransform<IN, OUT> getTransformation(StageSpec stageSpec) throws Exception {
String stageName = stageSpec.getName();
String pluginType = stageSpec.getPluginType();
StageMetrics stageMetrics = new DefaultStageMetrics(metrics, stageName);
StageStatisticsCollector collector = collectStageStatistics ? getStatisticsCollector(stageName) : NoopStageStatisticsCollector.INSTANCE;
Transformation transformation = getInitializedTransformation(stageSpec);
// we emit metrics for records into alert publishers when the actual alerts are published,
// not when we write the alerts to the temporary dataset
String recordsInMetric = AlertPublisher.PLUGIN_TYPE.equals(pluginType) ? null : Constants.Metrics.RECORDS_IN;
return new TrackedTransform<>(transformation, stageMetrics, recordsInMetric, Constants.Metrics.RECORDS_OUT, getDataTracer(stageName), collector);
}
use of io.cdap.cdap.etl.api.StageMetrics in project cdap by caskdata.
the class SmartWorkflow method destroy.
@Override
public void destroy() {
WorkflowContext workflowContext = getContext();
PipelineRuntime pipelineRuntime = new PipelineRuntime(workflowContext, workflowMetrics);
// Execute the post actions only if pipeline is not running in preview mode.
if (!workflowContext.getDataTracer(PostAction.PLUGIN_TYPE).isEnabled()) {
for (Map.Entry<String, PostAction> endingActionEntry : postActions.entrySet()) {
String name = endingActionEntry.getKey();
PostAction action = endingActionEntry.getValue();
StageSpec stageSpec = stageSpecs.get(name);
BatchActionContext context = new WorkflowBackedActionContext(workflowContext, pipelineRuntime, stageSpec);
try {
action.run(context);
} catch (Throwable t) {
LOG.error("Error while running post action {}.", name, t);
}
}
}
Map<String, String> connectorDatasets = GSON.fromJson(workflowContext.getWorkflowSpecification().getProperty(Constants.CONNECTOR_DATASETS), STAGE_DATASET_MAP);
// publish all alerts
for (Map.Entry<String, AlertPublisher> alertPublisherEntry : alertPublishers.entrySet()) {
String stageName = alertPublisherEntry.getKey();
AlertPublisher alertPublisher = alertPublisherEntry.getValue();
FileSet alertConnector = workflowContext.getDataset(connectorDatasets.get(stageName));
try (CloseableIterator<Alert> alerts = new AlertReader(alertConnector)) {
if (!alerts.hasNext()) {
continue;
}
StageMetrics stageMetrics = new DefaultStageMetrics(workflowMetrics, stageName);
StageSpec stageSpec = stageSpecs.get(stageName);
AlertPublisherContext alertContext = new DefaultAlertPublisherContext(pipelineRuntime, stageSpec, workflowContext, workflowContext.getAdmin());
alertPublisher.initialize(alertContext);
TrackedIterator<Alert> trackedIterator = new TrackedIterator<>(alerts, stageMetrics, Constants.Metrics.RECORDS_IN);
alertPublisher.publish(trackedIterator);
} catch (Exception e) {
LOG.warn("Stage {} had errors publishing alerts. Alerts may not have been published.", stageName, e);
} finally {
try {
alertPublisher.destroy();
} catch (Exception e) {
LOG.warn("Error destroying alert publisher for stage {}", stageName, e);
}
}
}
ProgramStatus status = getContext().getState().getStatus();
if (status == ProgramStatus.FAILED) {
WRAPPERLOGGER.error("Pipeline '{}' failed.", getContext().getApplicationSpecification().getName());
} else {
WRAPPERLOGGER.info("Pipeline '{}' {}.", getContext().getApplicationSpecification().getName(), status == ProgramStatus.COMPLETED ? "succeeded" : status.name().toLowerCase());
}
MacroEvaluator macroEvaluator = new DefaultMacroEvaluator(pipelineRuntime.getArguments(), workflowContext.getLogicalStartTime(), workflowContext, workflowContext, workflowContext.getNamespace());
// Get resolved plugin properties
Map<String, Map<String, String>> resolvedProperties = new HashMap<>();
for (StageSpec spec : stageSpecs.values()) {
String stageName = spec.getName();
resolvedProperties.put(stageName, workflowContext.getPluginProperties(stageName, macroEvaluator).getProperties());
}
// Add resolved plugin properties to workflow token as a JSON String
workflowContext.getToken().put(RESOLVED_PLUGIN_PROPERTIES_MAP, GSON.toJson(resolvedProperties));
// record only if the Workflow is successful
if (status != ProgramStatus.COMPLETED) {
return;
}
// Collect field operations from each phase
WorkflowToken token = workflowContext.getToken();
List<NodeValue> allNodeValues = token.getAll(Constants.FIELD_OPERATION_KEY_IN_WORKFLOW_TOKEN);
if (allNodeValues.isEmpty()) {
// no field lineage recorded by any stage
return;
}
Map<String, List<FieldOperation>> allStageOperations = new HashMap<>();
for (StageSpec stageSpec : stageSpecs.values()) {
allStageOperations.put(stageSpec.getName(), new ArrayList<>());
}
for (NodeValue nodeValue : allNodeValues) {
Map<String, List<FieldOperation>> stageOperations = GSON.fromJson(nodeValue.getValue().toString(), STAGE_OPERATIONS_MAP);
for (Map.Entry<String, List<FieldOperation>> entry : stageOperations.entrySet()) {
// ignore them
if (allStageOperations.containsKey(entry.getKey())) {
allStageOperations.get(entry.getKey()).addAll(entry.getValue());
}
}
}
FieldLineageProcessor processor = new FieldLineageProcessor(spec);
Set<Operation> processedOperations = processor.validateAndConvert(allStageOperations);
if (!processedOperations.isEmpty()) {
workflowContext.record(processedOperations);
}
}
use of io.cdap.cdap.etl.api.StageMetrics in project cdap by caskdata.
the class MapReduceTransformExecutorFactory method getTransformation.
@SuppressWarnings("unchecked")
@Override
protected <IN, OUT> TrackedTransform<IN, OUT> getTransformation(StageSpec stageSpec) throws Exception {
String stageName = stageSpec.getName();
String pluginType = stageSpec.getPluginType();
StageMetrics stageMetrics = new DefaultStageMetrics(metrics, stageName);
TaskAttemptContext taskAttemptContext = (TaskAttemptContext) taskContext.getHadoopContext();
StageStatisticsCollector collector = collectStageStatistics ? new MapReduceStageStatisticsCollector(stageName, taskAttemptContext) : new NoopStageStatisticsCollector();
if (BatchAggregator.PLUGIN_TYPE.equals(pluginType)) {
Object plugin = pluginInstantiator.newPluginInstance(stageName, macroEvaluator);
BatchAggregator<?, ?, ?> batchAggregator;
if (plugin instanceof BatchReducibleAggregator) {
BatchReducibleAggregator<?, ?, ?, ?> reducibleAggregator = (BatchReducibleAggregator<?, ?, ?, ?>) plugin;
batchAggregator = new AggregatorBridge<>(reducibleAggregator);
} else {
batchAggregator = (BatchAggregator<?, ?, ?>) plugin;
}
BatchRuntimeContext runtimeContext = createRuntimeContext(stageSpec);
batchAggregator.initialize(runtimeContext);
if (isMapPhase) {
return getTrackedEmitKeyStep(new MapperAggregatorTransformation(batchAggregator, mapOutputKeyClassName, mapOutputValClassName), stageMetrics, getDataTracer(stageName), collector);
} else {
return getTrackedAggregateStep(new ReducerAggregatorTransformation(batchAggregator, mapOutputKeyClassName, mapOutputValClassName), stageMetrics, getDataTracer(stageName), collector);
}
} else if (BatchJoiner.PLUGIN_TYPE.equals(pluginType)) {
Object plugin = pluginInstantiator.newPluginInstance(stageName, macroEvaluator);
BatchJoiner<?, ?, ?> batchJoiner;
Set<String> filterNullKeyStages = new HashSet<>();
if (plugin instanceof BatchAutoJoiner) {
BatchAutoJoiner autoJoiner = (BatchAutoJoiner) plugin;
FailureCollector failureCollector = new LoggingFailureCollector(stageName, stageSpec.getInputSchemas());
DefaultAutoJoinerContext context = DefaultAutoJoinerContext.from(stageSpec.getInputSchemas(), failureCollector);
// definition will be non-null due to validate by PipelinePhasePreparer at the start of the run
JoinDefinition joinDefinition = autoJoiner.define(context);
JoinCondition condition = joinDefinition.getCondition();
// should never happen as it's checked at deployment time, but add this to be safe.
if (condition.getOp() != JoinCondition.Op.KEY_EQUALITY) {
failureCollector.addFailure(String.format("Join stage '%s' uses a %s condition, which is not supported with the MapReduce engine.", stageName, condition.getOp()), "Switch to a different execution engine.");
}
failureCollector.getOrThrowException();
batchJoiner = new JoinerBridge(stageName, autoJoiner, joinDefinition);
// this is the same as filtering out records that have a null key if they are from an optional stage
if (condition.getOp() == JoinCondition.Op.KEY_EQUALITY && !((JoinCondition.OnKeys) condition).isNullSafe()) {
filterNullKeyStages = joinDefinition.getStages().stream().filter(s -> !s.isRequired()).map(JoinStage::getStageName).collect(Collectors.toSet());
}
} else {
batchJoiner = (BatchJoiner<?, ?, ?>) plugin;
}
BatchJoinerRuntimeContext runtimeContext = createRuntimeContext(stageSpec);
batchJoiner.initialize(runtimeContext);
if (isMapPhase) {
return getTrackedEmitKeyStep(new MapperJoinerTransformation(batchJoiner, mapOutputKeyClassName, mapOutputValClassName, filterNullKeyStages), stageMetrics, getDataTracer(stageName), collector);
} else {
return getTrackedMergeStep(new ReducerJoinerTransformation(batchJoiner, mapOutputKeyClassName, mapOutputValClassName, runtimeContext.getInputSchemas().size()), stageMetrics, getDataTracer(stageName), collector);
}
}
return super.getTransformation(stageSpec);
}
use of io.cdap.cdap.etl.api.StageMetrics in project cdap by caskdata.
the class PipelinePluginContext method wrapPlugin.
private Object wrapPlugin(String pluginId, Object plugin) {
Caller caller = getCaller(pluginId);
StageMetrics stageMetrics = new DefaultStageMetrics(metrics, pluginId);
OperationTimer operationTimer = processTimingEnabled ? new MetricsOperationTimer(stageMetrics) : NoOpOperationTimer.INSTANCE;
if (plugin instanceof Action) {
return new WrappedAction((Action) plugin, caller);
} else if (plugin instanceof BatchSource) {
return new WrappedBatchSource<>((BatchSource) plugin, caller, operationTimer);
} else if (plugin instanceof BatchSink) {
return new WrappedBatchSink<>((BatchSink) plugin, caller, operationTimer);
} else if (plugin instanceof ErrorTransform) {
return new WrappedErrorTransform<>((ErrorTransform) plugin, caller, operationTimer);
} else if (plugin instanceof Transform) {
return new WrappedTransform<>((Transform) plugin, caller, operationTimer);
} else if (plugin instanceof BatchReducibleAggregator) {
return new WrappedReduceAggregator<>((BatchReducibleAggregator) plugin, caller, operationTimer);
} else if (plugin instanceof BatchAggregator) {
return new WrappedBatchAggregator<>((BatchAggregator) plugin, caller, operationTimer);
} else if (plugin instanceof BatchJoiner) {
return new WrappedBatchJoiner<>((BatchJoiner) plugin, caller, operationTimer);
} else if (plugin instanceof PostAction) {
return new WrappedPostAction((PostAction) plugin, caller);
} else if (plugin instanceof SplitterTransform) {
return new WrappedSplitterTransform<>((SplitterTransform) plugin, caller, operationTimer);
}
return wrapUnknownPlugin(pluginId, plugin, caller);
}
Aggregations