use of co.cask.cdap.etl.common.DefaultMacroEvaluator in project cdap by caskdata.
the class ETLWorkflow method initialize.
@Override
public void initialize(WorkflowContext context) throws Exception {
super.initialize(context);
postActions = new LinkedHashMap<>();
BatchPipelineSpec batchPipelineSpec = GSON.fromJson(context.getWorkflowSpecification().getProperty("pipeline.spec"), BatchPipelineSpec.class);
MacroEvaluator macroEvaluator = new DefaultMacroEvaluator(context.getToken(), context.getRuntimeArguments(), context.getLogicalStartTime(), context, context.getNamespace());
for (ActionSpec actionSpec : batchPipelineSpec.getEndingActions()) {
postActions.put(actionSpec.getName(), (PostAction) context.newPluginInstance(actionSpec.getName(), macroEvaluator));
}
}
use of co.cask.cdap.etl.common.DefaultMacroEvaluator in project cdap by caskdata.
the class PipelineAction method run.
@Override
public void run() throws Exception {
CustomActionContext context = getContext();
Map<String, String> properties = context.getSpecification().getProperties();
BatchPhaseSpec phaseSpec = GSON.fromJson(properties.get(Constants.PIPELINEID), BatchPhaseSpec.class);
PipelinePhase phase = phaseSpec.getPhase();
StageInfo stageInfo = phase.iterator().next();
PluginContext pluginContext = new PipelinePluginContext(context, metrics, phaseSpec.isStageLoggingEnabled(), phaseSpec.isProcessTimingEnabled());
Action action = pluginContext.newPluginInstance(stageInfo.getName(), new DefaultMacroEvaluator(context.getWorkflowToken(), context.getRuntimeArguments(), context.getLogicalStartTime(), context, context.getNamespace()));
BasicArguments arguments = new BasicArguments(context);
ActionContext actionContext = new BasicActionContext(context, metrics, stageInfo.getName(), arguments);
if (!context.getDataTracer(stageInfo.getName()).isEnabled()) {
action.run(actionContext);
}
WorkflowToken token = context.getWorkflowToken();
if (token == null) {
throw new IllegalStateException("WorkflowToken cannot be null when action is executed through Workflow.");
}
for (Map.Entry<String, String> entry : arguments.getAddedArguments().entrySet()) {
token.put(entry.getKey(), entry.getValue());
}
}
use of co.cask.cdap.etl.common.DefaultMacroEvaluator in project cdap by caskdata.
the class ETLSpark method initialize.
@Override
public void initialize() throws Exception {
SparkClientContext context = getContext();
cleanupFiles = new ArrayList<>();
CompositeFinisher.Builder finishers = CompositeFinisher.builder();
SparkConf sparkConf = new SparkConf();
sparkConf.set("spark.driver.extraJavaOptions", "-XX:MaxPermSize=256m");
sparkConf.set("spark.executor.extraJavaOptions", "-XX:MaxPermSize=256m");
sparkConf.set("spark.speculation", "false");
context.setSparkConf(sparkConf);
Map<String, String> properties = context.getSpecification().getProperties();
BatchPhaseSpec phaseSpec = GSON.fromJson(properties.get(Constants.PIPELINEID), BatchPhaseSpec.class);
for (Map.Entry<String, String> pipelineProperty : phaseSpec.getPipelineProperties().entrySet()) {
sparkConf.set(pipelineProperty.getKey(), pipelineProperty.getValue());
}
MacroEvaluator evaluator = new DefaultMacroEvaluator(context.getWorkflowToken(), context.getRuntimeArguments(), context.getLogicalStartTime(), context, context.getNamespace());
SparkBatchSourceFactory sourceFactory = new SparkBatchSourceFactory();
SparkBatchSinkFactory sinkFactory = new SparkBatchSinkFactory();
Map<String, Integer> stagePartitions = new HashMap<>();
PluginContext pluginContext = new SparkPipelinePluginContext(context, context.getMetrics(), phaseSpec.isStageLoggingEnabled(), phaseSpec.isProcessTimingEnabled());
for (StageInfo stageInfo : phaseSpec.getPhase()) {
String stageName = stageInfo.getName();
String pluginType = stageInfo.getPluginType();
if (BatchSource.PLUGIN_TYPE.equals(pluginType)) {
BatchConfigurable<BatchSourceContext> batchSource = pluginContext.newPluginInstance(stageName, evaluator);
BatchSourceContext sourceContext = new SparkBatchSourceContext(sourceFactory, context, stageInfo);
batchSource.prepareRun(sourceContext);
finishers.add(batchSource, sourceContext);
} else if (BatchSink.PLUGIN_TYPE.equals(pluginType)) {
BatchConfigurable<BatchSinkContext> batchSink = pluginContext.newPluginInstance(stageName, evaluator);
BatchSinkContext sinkContext = new SparkBatchSinkContext(sinkFactory, context, null, stageInfo);
batchSink.prepareRun(sinkContext);
finishers.add(batchSink, sinkContext);
} else if (SparkSink.PLUGIN_TYPE.equals(pluginType)) {
BatchConfigurable<SparkPluginContext> sparkSink = pluginContext.newPluginInstance(stageName, evaluator);
SparkPluginContext sparkPluginContext = new BasicSparkPluginContext(context, stageInfo);
sparkSink.prepareRun(sparkPluginContext);
finishers.add(sparkSink, sparkPluginContext);
} else if (BatchAggregator.PLUGIN_TYPE.equals(pluginType)) {
BatchAggregator aggregator = pluginContext.newPluginInstance(stageName, evaluator);
DefaultAggregatorContext aggregatorContext = new DefaultAggregatorContext(context, stageInfo);
aggregator.prepareRun(aggregatorContext);
finishers.add(aggregator, aggregatorContext);
stagePartitions.put(stageName, aggregatorContext.getNumPartitions());
} else if (BatchJoiner.PLUGIN_TYPE.equals(pluginType)) {
BatchJoiner joiner = pluginContext.newPluginInstance(stageName, evaluator);
DefaultJoinerContext sparkJoinerContext = new DefaultJoinerContext(context, stageInfo);
joiner.prepareRun(sparkJoinerContext);
finishers.add(joiner, sparkJoinerContext);
stagePartitions.put(stageName, sparkJoinerContext.getNumPartitions());
}
}
File configFile = File.createTempFile("HydratorSpark", ".config");
cleanupFiles.add(configFile);
try (Writer writer = Files.newBufferedWriter(configFile.toPath(), StandardCharsets.UTF_8)) {
SparkBatchSourceSinkFactoryInfo sourceSinkInfo = new SparkBatchSourceSinkFactoryInfo(sourceFactory, sinkFactory, stagePartitions);
writer.write(GSON.toJson(sourceSinkInfo));
}
finisher = finishers.build();
context.localize("HydratorSpark.config", configFile.toURI());
}
use of co.cask.cdap.etl.common.DefaultMacroEvaluator in project cdap by caskdata.
the class SmartWorkflow method initialize.
@Override
public void initialize(WorkflowContext context) throws Exception {
super.initialize(context);
String arguments = Joiner.on(", ").withKeyValueSeparator("=").join(context.getRuntimeArguments());
WRAPPERLOGGER.info("Pipeline '{}' is started by user '{}' with arguments {}", context.getApplicationSpecification().getName(), UserGroupInformation.getCurrentUser().getShortUserName(), arguments);
postActions = new LinkedHashMap<>();
spec = GSON.fromJson(context.getWorkflowSpecification().getProperty(Constants.PIPELINE_SPEC_KEY), BatchPipelineSpec.class);
MacroEvaluator macroEvaluator = new DefaultMacroEvaluator(context.getToken(), context.getRuntimeArguments(), context.getLogicalStartTime(), context, context.getNamespace());
PluginContext pluginContext = new PipelinePluginContext(context, workflowMetrics, spec.isStageLoggingEnabled(), spec.isProcessTimingEnabled());
for (ActionSpec actionSpec : spec.getEndingActions()) {
postActions.put(actionSpec.getName(), (PostAction) pluginContext.newPluginInstance(actionSpec.getName(), macroEvaluator));
}
WRAPPERLOGGER.info("Pipeline '{}' running", context.getApplicationSpecification().getName());
}
use of co.cask.cdap.etl.common.DefaultMacroEvaluator in project cdap by caskdata.
the class MapReduceTransformExecutorFactory method getTransformation.
@SuppressWarnings("unchecked")
@Override
protected TrackedTransform getTransformation(StageInfo stageInfo) throws Exception {
DefaultMacroEvaluator macroEvaluator = new DefaultMacroEvaluator(taskContext.getWorkflowToken(), taskContext.getRuntimeArguments(), taskContext.getLogicalStartTime(), taskContext, taskContext.getNamespace());
String stageName = stageInfo.getName();
String pluginType = stageInfo.getPluginType();
StageMetrics stageMetrics = new DefaultStageMetrics(metrics, stageName);
if (BatchAggregator.PLUGIN_TYPE.equals(pluginType)) {
BatchAggregator<?, ?, ?> batchAggregator = pluginInstantiator.newPluginInstance(stageName, macroEvaluator);
BatchRuntimeContext runtimeContext = createRuntimeContext(stageInfo);
batchAggregator.initialize(runtimeContext);
if (isMapPhase) {
return getTrackedEmitKeyStep(new MapperAggregatorTransformation(batchAggregator, mapOutputKeyClassName, mapOutputValClassName), stageMetrics, taskContext.getDataTracer(stageName));
} else {
return getTrackedAggregateStep(new ReducerAggregatorTransformation(batchAggregator, mapOutputKeyClassName, mapOutputValClassName), stageMetrics, taskContext.getDataTracer(stageName));
}
} else if (BatchJoiner.PLUGIN_TYPE.equals(pluginType)) {
BatchJoiner<?, ?, ?> batchJoiner = pluginInstantiator.newPluginInstance(stageName, macroEvaluator);
BatchJoinerRuntimeContext runtimeContext = createRuntimeContext(stageInfo);
batchJoiner.initialize(runtimeContext);
if (isMapPhase) {
return getTrackedEmitKeyStep(new MapperJoinerTransformation(batchJoiner, mapOutputKeyClassName, mapOutputValClassName), stageMetrics, taskContext.getDataTracer(stageName));
} else {
return getTrackedMergeStep(new ReducerJoinerTransformation(batchJoiner, mapOutputKeyClassName, mapOutputValClassName, runtimeContext.getInputSchemas().size()), stageMetrics, taskContext.getDataTracer(stageName));
}
}
Transformation transformation = getInitializedTransformation(stageInfo);
boolean isLimitingSource = taskContext.getDataTracer(stageName).isEnabled() && BatchSource.PLUGIN_TYPE.equals(pluginType) && isMapPhase;
return new TrackedTransform(isLimitingSource ? new LimitingTransform(transformation, numberOfRecordsPreview) : transformation, stageMetrics, taskContext.getDataTracer(stageName));
}
Aggregations