use of co.cask.cdap.etl.common.PipelinePhase in project cdap by caskdata.
the class SparkStreamingPipelineDriver method run.
@Override
public void run(final JavaSparkExecutionContext sec) throws Exception {
final DataStreamsPipelineSpec pipelineSpec = GSON.fromJson(sec.getSpecification().getProperty(Constants.PIPELINEID), DataStreamsPipelineSpec.class);
final PipelinePhase pipelinePhase = PipelinePhase.builder(SUPPORTED_PLUGIN_TYPES).addConnections(pipelineSpec.getConnections()).addStages(pipelineSpec.getStages()).build();
boolean checkpointsDisabled = pipelineSpec.isCheckpointsDisabled();
String checkpointDir = null;
if (!checkpointsDisabled) {
// Get the location of the checkpoint directory.
String pipelineName = sec.getApplicationSpecification().getName();
String relativeCheckpointDir = pipelineSpec.getCheckpointDirectory();
// there isn't any way to instantiate the fileset except in a TxRunnable, so need to use a reference.
final AtomicReference<Location> checkpointBaseRef = new AtomicReference<>();
Transactionals.execute(sec, new TxRunnable() {
@Override
public void run(DatasetContext context) throws Exception {
FileSet checkpointFileSet = context.getDataset(DataStreamsApp.CHECKPOINT_FILESET);
checkpointBaseRef.set(checkpointFileSet.getBaseLocation());
}
}, Exception.class);
Location pipelineCheckpointDir = checkpointBaseRef.get().append(pipelineName).append(relativeCheckpointDir);
checkpointDir = pipelineCheckpointDir.toURI().toString();
}
JavaStreamingContext jssc = run(pipelineSpec, pipelinePhase, sec, checkpointDir);
jssc.start();
boolean stopped = false;
try {
// most programs will just keep running forever.
// however, when CDAP stops the program, we get an interrupted exception.
// at that point, we need to call stop on jssc, otherwise the program will hang and never stop.
stopped = jssc.awaitTerminationOrTimeout(Long.MAX_VALUE);
} finally {
if (!stopped) {
jssc.stop(true, pipelineSpec.isStopGracefully());
}
}
}
use of co.cask.cdap.etl.common.PipelinePhase in project cdap by caskdata.
the class ETLBatchApplication method configure.
@Override
public void configure() {
ETLBatchConfig config = getConfig().convertOldConfig();
setDescription(DEFAULT_DESCRIPTION);
BatchPipelineSpec spec = new BatchPipelineSpecGenerator<>(getConfigurer(), ImmutableSet.of(BatchSource.PLUGIN_TYPE), ImmutableSet.of(BatchSink.PLUGIN_TYPE), config.getEngine()).generateSpec(config);
int sourceCount = 0;
for (StageSpec stageSpec : spec.getStages()) {
if (BatchSource.PLUGIN_TYPE.equals(stageSpec.getPlugin().getType())) {
sourceCount++;
}
}
if (sourceCount != 1) {
throw new IllegalArgumentException("Invalid pipeline. There must only be one source.");
}
PipelinePlanner planner = new PipelinePlanner(SUPPORTED_PLUGIN_TYPES, ImmutableSet.<String>of(), ImmutableSet.<String>of(), ImmutableSet.<String>of(), ImmutableSet.<String>of());
PipelinePlan plan = planner.plan(spec);
if (plan.getPhases().size() != 1) {
// should never happen if there is only one source
throw new IllegalArgumentException("There was an error planning the pipeline. There should only be one phase.");
}
PipelinePhase pipeline = plan.getPhases().values().iterator().next();
switch(config.getEngine()) {
case MAPREDUCE:
BatchPhaseSpec batchPhaseSpec = new BatchPhaseSpec(ETLMapReduce.NAME, pipeline, config.getResources(), config.getDriverResources(), config.getClientResources(), config.isStageLoggingEnabled(), config.isProcessTimingEnabled(), new HashMap<String, String>(), config.getNumOfRecordsPreview(), config.getProperties(), false);
addMapReduce(new ETLMapReduce(batchPhaseSpec));
break;
case SPARK:
batchPhaseSpec = new BatchPhaseSpec(ETLSpark.class.getSimpleName(), pipeline, config.getResources(), config.getDriverResources(), config.getClientResources(), config.isStageLoggingEnabled(), config.isProcessTimingEnabled(), new HashMap<String, String>(), config.getNumOfRecordsPreview(), config.getProperties(), false);
addSpark(new ETLSpark(batchPhaseSpec));
break;
default:
throw new IllegalArgumentException(String.format("Invalid execution engine '%s'. Must be one of %s.", config.getEngine(), Joiner.on(',').join(Engine.values())));
}
addWorkflow(new ETLWorkflow(spec, config.getEngine()));
schedule(buildSchedule(SCHEDULE_NAME, ProgramType.WORKFLOW, ETLWorkflow.NAME).setDescription("ETL Batch schedule").triggerByTime(config.getSchedule()));
}
Aggregations