use of io.cdap.cdap.etl.common.DefaultStageMetrics in project cdap by cdapio.
the class PipelinePluginContext method wrapPlugin.
private Object wrapPlugin(String pluginId, Object plugin) {
Caller caller = getCaller(pluginId);
StageMetrics stageMetrics = new DefaultStageMetrics(metrics, pluginId);
OperationTimer operationTimer = processTimingEnabled ? new MetricsOperationTimer(stageMetrics) : NoOpOperationTimer.INSTANCE;
if (plugin instanceof Action) {
return new WrappedAction((Action) plugin, caller);
} else if (plugin instanceof BatchSource) {
return new WrappedBatchSource<>((BatchSource) plugin, caller, operationTimer);
} else if (plugin instanceof BatchSink) {
return new WrappedBatchSink<>((BatchSink) plugin, caller, operationTimer);
} else if (plugin instanceof ErrorTransform) {
return new WrappedErrorTransform<>((ErrorTransform) plugin, caller, operationTimer);
} else if (plugin instanceof Transform) {
return new WrappedTransform<>((Transform) plugin, caller, operationTimer);
} else if (plugin instanceof BatchReducibleAggregator) {
return new WrappedReduceAggregator<>((BatchReducibleAggregator) plugin, caller, operationTimer);
} else if (plugin instanceof BatchAggregator) {
return new WrappedBatchAggregator<>((BatchAggregator) plugin, caller, operationTimer);
} else if (plugin instanceof BatchJoiner) {
return new WrappedBatchJoiner<>((BatchJoiner) plugin, caller, operationTimer);
} else if (plugin instanceof PostAction) {
return new WrappedPostAction((PostAction) plugin, caller);
} else if (plugin instanceof SplitterTransform) {
return new WrappedSplitterTransform<>((SplitterTransform) plugin, caller, operationTimer);
}
return wrapUnknownPlugin(pluginId, plugin, caller);
}
use of io.cdap.cdap.etl.common.DefaultStageMetrics in project cdap by cdapio.
the class BatchSQLEngineAdapter method write.
/**
* Try to write the output directly to the SQLEngineOutput registered by this engine.
*
* @param datasetName dataset to write
* @param sqlEngineOutput output instance created by this engine
* @return {@link SQLEngineJob<Boolean>} representing if the write operation succeded.
*/
public SQLEngineJob<Boolean> write(String datasetName, SQLEngineOutput sqlEngineOutput) {
String outputStageName = sqlEngineOutput.getStageName();
SQLEngineWriteJobKey writeJobKey = new SQLEngineWriteJobKey(datasetName, outputStageName, SQLEngineJobType.WRITE);
// Run write job
return runJob(writeJobKey, () -> {
getDatasetForStage(datasetName);
LOG.debug("Attempting write for dataset {} into {}", datasetName, sqlEngineOutput);
SQLWriteResult writeResult = sqlEngine.write(new SQLWriteRequest(datasetName, sqlEngineOutput));
LOG.debug("Write dataset {} into {} was {}", datasetName, sqlEngineOutput, writeResult.isSuccessful() ? "completed" : "refused");
// If the result was successful, add stage metrics.
if (writeResult.isSuccessful()) {
DefaultStageMetrics stageMetrics = new DefaultStageMetrics(metrics, outputStageName);
StageStatisticsCollector statisticsCollector = statsCollectors.get(outputStageName);
countRecordsIn(writeResult.getNumRecords(), statisticsCollector, stageMetrics);
countRecordsOut(writeResult.getNumRecords(), statisticsCollector, stageMetrics);
}
return writeResult.isSuccessful();
});
}
use of io.cdap.cdap.etl.common.DefaultStageMetrics in project cdap by cdapio.
the class BaseRDDCollection method publishAlerts.
@Override
public void publishAlerts(StageSpec stageSpec, StageStatisticsCollector collector) throws Exception {
PluginFunctionContext pluginFunctionContext = new PluginFunctionContext(stageSpec, sec, collector);
AlertPublisher alertPublisher = pluginFunctionContext.createPlugin();
PipelineRuntime pipelineRuntime = new SparkPipelineRuntime(sec);
AlertPublisherContext alertPublisherContext = new DefaultAlertPublisherContext(pipelineRuntime, stageSpec, sec.getMessagingContext(), sec.getAdmin());
alertPublisher.initialize(alertPublisherContext);
StageMetrics stageMetrics = new DefaultStageMetrics(sec.getMetrics(), stageSpec.getName());
TrackedIterator<Alert> trackedAlerts = new TrackedIterator<>(((JavaRDD<Alert>) rdd).collect().iterator(), stageMetrics, Constants.Metrics.RECORDS_IN);
alertPublisher.publish(trackedAlerts);
alertPublisher.destroy();
}
use of io.cdap.cdap.etl.common.DefaultStageMetrics in project cdap by cdapio.
the class BatchSQLEngineAdapter method tryRelationalTransform.
/**
* This method is called when engine is present and is willing to try performing a relational transform.
*
* @param stageSpec stage specification
* @param transform transform plugin
* @param input input collections
* @return resulting collection or empty optional if tranform can't be done with this engine
*/
public Optional<SQLEngineJob<SQLDataset>> tryRelationalTransform(StageSpec stageSpec, RelationalTransform transform, Map<String, SparkCollection<Object>> input) {
String stageName = stageSpec.getName();
Map<String, Relation> inputRelations = input.entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, e -> sqlEngine.getRelation(new SQLRelationDefinition(e.getKey(), stageSpec.getInputSchemas().get(e.getKey())))));
BasicRelationalTransformContext pluginContext = new BasicRelationalTransformContext(getSQLRelationalEngine(), inputRelations, stageSpec.getInputSchemas(), stageSpec.getOutputSchema());
if (!transform.transform(pluginContext)) {
// Plugin was not able to do relational tranform with this engine
return Optional.empty();
}
if (pluginContext.getOutputRelation() == null) {
// Plugin said that tranformation was success but failed to set output
throw new IllegalStateException("Plugin " + transform + " did not produce a relational output");
}
if (!pluginContext.getOutputRelation().isValid()) {
// An output is set to invalid relation, probably some of transforms are not supported by an engine
return Optional.empty();
}
// Ensure input and output schemas for this stage are supported by the engine
if (stageSpec.getInputSchemas().values().stream().anyMatch(s -> !sqlEngine.supportsInputSchema(s))) {
return Optional.empty();
}
if (!sqlEngine.supportsOutputSchema(stageSpec.getOutputSchema())) {
return Optional.empty();
}
// Validate transformation definition with engine
SQLTransformDefinition transformDefinition = new SQLTransformDefinition(stageName, pluginContext.getOutputRelation(), stageSpec.getOutputSchema(), Collections.emptyMap(), Collections.emptyMap());
if (!sqlEngine.canTransform(transformDefinition)) {
return Optional.empty();
}
return Optional.of(runJob(stageSpec.getName(), SQLEngineJobType.EXECUTE, () -> {
// Push all stages that need to be pushed to execute this aggregation
input.forEach((name, collection) -> {
if (!exists(name)) {
push(name, stageSpec.getInputSchemas().get(name), collection);
}
});
// Initialize metrics collector
DefaultStageMetrics stageMetrics = new DefaultStageMetrics(metrics, stageName);
StageStatisticsCollector statisticsCollector = statsCollectors.get(stageName);
// Collect input datasets and execute transformation
Map<String, SQLDataset> inputDatasets = input.keySet().stream().collect(Collectors.toMap(Function.identity(), this::getDatasetForStage));
// Count input records
for (SQLDataset inputDataset : inputDatasets.values()) {
countRecordsIn(inputDataset, statisticsCollector, stageMetrics);
}
// Execute transform
SQLTransformRequest sqlContext = new SQLTransformRequest(inputDatasets, stageSpec.getName(), pluginContext.getOutputRelation(), stageSpec.getOutputSchema());
SQLDataset transformed = sqlEngine.transform(sqlContext);
// Count output records
countRecordsOut(transformed, statisticsCollector, stageMetrics);
return transformed;
}));
}
use of io.cdap.cdap.etl.common.DefaultStageMetrics in project cdap by cdapio.
the class MapReduceTransformExecutorFactory method getTransformation.
@SuppressWarnings("unchecked")
@Override
protected <IN, OUT> TrackedTransform<IN, OUT> getTransformation(StageSpec stageSpec) throws Exception {
String stageName = stageSpec.getName();
String pluginType = stageSpec.getPluginType();
StageMetrics stageMetrics = new DefaultStageMetrics(metrics, stageName);
TaskAttemptContext taskAttemptContext = (TaskAttemptContext) taskContext.getHadoopContext();
StageStatisticsCollector collector = collectStageStatistics ? new MapReduceStageStatisticsCollector(stageName, taskAttemptContext) : new NoopStageStatisticsCollector();
if (BatchAggregator.PLUGIN_TYPE.equals(pluginType)) {
Object plugin = pluginInstantiator.newPluginInstance(stageName, macroEvaluator);
BatchAggregator<?, ?, ?> batchAggregator;
if (plugin instanceof BatchReducibleAggregator) {
BatchReducibleAggregator<?, ?, ?, ?> reducibleAggregator = (BatchReducibleAggregator<?, ?, ?, ?>) plugin;
batchAggregator = new AggregatorBridge<>(reducibleAggregator);
} else {
batchAggregator = (BatchAggregator<?, ?, ?>) plugin;
}
BatchRuntimeContext runtimeContext = createRuntimeContext(stageSpec);
batchAggregator.initialize(runtimeContext);
if (isMapPhase) {
return getTrackedEmitKeyStep(new MapperAggregatorTransformation(batchAggregator, mapOutputKeyClassName, mapOutputValClassName), stageMetrics, getDataTracer(stageName), collector);
} else {
return getTrackedAggregateStep(new ReducerAggregatorTransformation(batchAggregator, mapOutputKeyClassName, mapOutputValClassName), stageMetrics, getDataTracer(stageName), collector);
}
} else if (BatchJoiner.PLUGIN_TYPE.equals(pluginType)) {
Object plugin = pluginInstantiator.newPluginInstance(stageName, macroEvaluator);
BatchJoiner<?, ?, ?> batchJoiner;
Set<String> filterNullKeyStages = new HashSet<>();
if (plugin instanceof BatchAutoJoiner) {
BatchAutoJoiner autoJoiner = (BatchAutoJoiner) plugin;
FailureCollector failureCollector = new LoggingFailureCollector(stageName, stageSpec.getInputSchemas());
DefaultAutoJoinerContext context = DefaultAutoJoinerContext.from(stageSpec.getInputSchemas(), failureCollector);
// definition will be non-null due to validate by PipelinePhasePreparer at the start of the run
JoinDefinition joinDefinition = autoJoiner.define(context);
JoinCondition condition = joinDefinition.getCondition();
// should never happen as it's checked at deployment time, but add this to be safe.
if (condition.getOp() != JoinCondition.Op.KEY_EQUALITY) {
failureCollector.addFailure(String.format("Join stage '%s' uses a %s condition, which is not supported with the MapReduce engine.", stageName, condition.getOp()), "Switch to a different execution engine.");
}
failureCollector.getOrThrowException();
batchJoiner = new JoinerBridge(stageName, autoJoiner, joinDefinition);
// this is the same as filtering out records that have a null key if they are from an optional stage
if (condition.getOp() == JoinCondition.Op.KEY_EQUALITY && !((JoinCondition.OnKeys) condition).isNullSafe()) {
filterNullKeyStages = joinDefinition.getStages().stream().filter(s -> !s.isRequired()).map(JoinStage::getStageName).collect(Collectors.toSet());
}
} else {
batchJoiner = (BatchJoiner<?, ?, ?>) plugin;
}
BatchJoinerRuntimeContext runtimeContext = createRuntimeContext(stageSpec);
batchJoiner.initialize(runtimeContext);
if (isMapPhase) {
return getTrackedEmitKeyStep(new MapperJoinerTransformation(batchJoiner, mapOutputKeyClassName, mapOutputValClassName, filterNullKeyStages), stageMetrics, getDataTracer(stageName), collector);
} else {
return getTrackedMergeStep(new ReducerJoinerTransformation(batchJoiner, mapOutputKeyClassName, mapOutputValClassName, runtimeContext.getInputSchemas().size()), stageMetrics, getDataTracer(stageName), collector);
}
}
return super.getTransformation(stageSpec);
}
Aggregations