use of co.cask.cdap.api.data.DatasetContext in project cdap by caskdata.
the class SparkRuntimeService method destroy.
/**
* Calls the destroy or onFinish method of {@link ProgramLifecycle}.
*/
private void destroy(final ProgramState state) throws Exception {
final TransactionControl txControl = spark instanceof ProgramLifecycle ? Transactions.getTransactionControl(TransactionControl.IMPLICIT, Spark.class, spark, "destroy") : TransactionControl.IMPLICIT;
TxRunnable runnable = new TxRunnable() {
@Override
public void run(DatasetContext ctxt) throws Exception {
Cancellable cancellable = SparkRuntimeUtils.setContextClassLoader(new SparkClassLoader(runtimeContext));
try {
context.setState(state);
if (spark instanceof ProgramLifecycle) {
((ProgramLifecycle) spark).destroy();
} else {
spark.onFinish(state.getStatus() == ProgramStatus.COMPLETED, context);
}
} finally {
cancellable.cancel();
}
}
};
if (TransactionControl.IMPLICIT == txControl) {
context.execute(runnable);
} else {
runnable.run(context);
}
}
use of co.cask.cdap.api.data.DatasetContext in project cdap by caskdata.
the class SparkStreamingPipelineDriver method run.
@Override
public void run(final JavaSparkExecutionContext sec) throws Exception {
final DataStreamsPipelineSpec pipelineSpec = GSON.fromJson(sec.getSpecification().getProperty(Constants.PIPELINEID), DataStreamsPipelineSpec.class);
PipelinePhase.Builder phaseBuilder = PipelinePhase.builder(SUPPORTED_PLUGIN_TYPES).addConnections(pipelineSpec.getConnections());
for (StageSpec stageSpec : pipelineSpec.getStages()) {
phaseBuilder.addStage(StageInfo.builder(stageSpec.getName(), stageSpec.getPlugin().getType()).addInputs(stageSpec.getInputs()).addOutputs(stageSpec.getOutputs()).addInputSchemas(stageSpec.getInputSchemas()).setOutputSchema(stageSpec.getOutputSchema()).setErrorSchema(stageSpec.getErrorSchema()).setStageLoggingEnabled(pipelineSpec.isStageLoggingEnabled()).setProcessTimingEnabled(pipelineSpec.isProcessTimingEnabled()).build());
}
final PipelinePhase pipelinePhase = phaseBuilder.build();
boolean checkpointsDisabled = pipelineSpec.isCheckpointsDisabled();
String checkpointDir = null;
if (!checkpointsDisabled) {
// Get the location of the checkpoint directory.
String pipelineName = sec.getApplicationSpecification().getName();
String relativeCheckpointDir = pipelineSpec.getCheckpointDirectory();
// there isn't any way to instantiate the fileset except in a TxRunnable, so need to use a reference.
final AtomicReference<Location> checkpointBaseRef = new AtomicReference<>();
Transactionals.execute(sec, new TxRunnable() {
@Override
public void run(DatasetContext context) throws Exception {
FileSet checkpointFileSet = context.getDataset(DataStreamsApp.CHECKPOINT_FILESET);
checkpointBaseRef.set(checkpointFileSet.getBaseLocation());
}
}, Exception.class);
Location pipelineCheckpointDir = checkpointBaseRef.get().append(pipelineName).append(relativeCheckpointDir);
checkpointDir = pipelineCheckpointDir.toURI().toString();
}
JavaStreamingContext jssc = run(pipelineSpec, pipelinePhase, sec, checkpointDir);
jssc.start();
boolean stopped = false;
try {
// most programs will just keep running forever.
// however, when CDAP stops the program, we get an interrupted exception.
// at that point, we need to call stop on jssc, otherwise the program will hang and never stop.
stopped = jssc.awaitTerminationOrTimeout(Long.MAX_VALUE);
} finally {
if (!stopped) {
jssc.stop(true, pipelineSpec.isStopGracefully());
}
}
}
use of co.cask.cdap.api.data.DatasetContext in project cdap by caskdata.
the class AppWithCustomTx method attemptNestedTransaction.
/**
* Attempt to nest transactions. we expect this to fail, and if it does, we write the value "failed"
* to the table, for the test case to validate.
*/
static void attemptNestedTransaction(Transactional txnl, final String row, final String key) {
try {
txnl.execute(new TxRunnable() {
@Override
public void run(DatasetContext ctext) throws Exception {
recordTransaction(ctext, row, key);
}
});
LOG.error("Nested transaction should not have succeeded for {}:{}", row, key);
} catch (TransactionFailureException e) {
// expected: starting nested transaction should fail
LOG.info("Nested transaction failed as expected for {}:{}", row, key);
} catch (RuntimeException e) {
// TODO (CDAP-6837): this is needed because worker's execute() propagates the tx failure as a runtime exception
if (e.getCause() instanceof TransactionFailureException) {
// expected: starting nested transaction should fail
LOG.info("Nested transaction failed as expected for {}:{}", row, key);
} else {
throw e;
}
}
// we know that the transactional is a program context and hence implement DatasetContext
TransactionCapturingTable capture = ((DatasetContext) txnl).getDataset(CAPTURE);
capture.getTable().put(new Put(row, key, FAILED));
}
use of co.cask.cdap.api.data.DatasetContext in project cdap by caskdata.
the class CharCountProgram method run.
@Override
public void run(final JavaSparkExecutionContext sec) throws Exception {
JavaSparkContext sc = new JavaSparkContext();
// Verify the codec is being set
Preconditions.checkArgument("org.apache.spark.io.LZFCompressionCodec".equals(sc.getConf().get("spark.io.compression.codec")));
// read the dataset
JavaPairRDD<byte[], String> inputData = sec.fromDataset("keys");
// create a new RDD with the same key but with a new value which is the length of the string
final JavaPairRDD<byte[], byte[]> stringLengths = inputData.mapToPair(new PairFunction<Tuple2<byte[], String>, byte[], byte[]>() {
@Override
public Tuple2<byte[], byte[]> call(Tuple2<byte[], String> stringTuple2) throws Exception {
return new Tuple2<>(stringTuple2._1(), Bytes.toBytes(stringTuple2._2().length()));
}
});
// write a total count to a table (that emits a metric we can validate in the test case)
sec.execute(new TxRunnable() {
@Override
public void run(DatasetContext context) throws Exception {
long count = stringLengths.count();
Table totals = context.getDataset("totals");
totals.increment(new Increment("total").add("total", count));
// write the character count to dataset
sec.saveAsDataset(stringLengths, "count");
}
});
}
use of co.cask.cdap.api.data.DatasetContext in project cdap by caskdata.
the class SparkCSVToSpaceProgram method run.
@Override
public void run(final JavaSparkExecutionContext sec) throws Exception {
JavaSparkContext jsc = new JavaSparkContext();
Map<String, String> fileSetArgs = new HashMap<>();
final Metrics metrics = sec.getMetrics();
FileSetArguments.addInputPath(fileSetArgs, sec.getRuntimeArguments().get("input.path"));
JavaPairRDD<LongWritable, Text> input = sec.fromDataset(WorkflowAppWithLocalDatasets.CSV_FILESET_DATASET, fileSetArgs);
final List<String> converted = input.values().map(new Function<Text, String>() {
@Override
public String call(Text input) throws Exception {
String line = input.toString();
metrics.count("num.lines", 1);
return line.replaceAll(",", " ");
}
}).collect();
sec.execute(new TxRunnable() {
@Override
public void run(DatasetContext context) throws Exception {
Map<String, String> args = sec.getRuntimeArguments();
String outputPath = args.get("output.path");
Map<String, String> fileSetArgs = new HashMap<>();
FileSetArguments.setOutputPath(fileSetArgs, outputPath);
FileSet fileSet = context.getDataset(WorkflowAppWithLocalDatasets.CSV_FILESET_DATASET, fileSetArgs);
try (PrintWriter writer = new PrintWriter(fileSet.getOutputLocation().getOutputStream())) {
for (String line : converted) {
writer.write(line);
writer.println();
}
}
}
});
}
Aggregations