use of co.cask.cdap.api.TxRunnable in project cdap by caskdata.
the class DefaultStore method setStop.
@Override
public void setStop(final ProgramId id, final String pid, final long endTime, final ProgramRunStatus runStatus, final BasicThrowable failureCause) {
Preconditions.checkArgument(runStatus != null, "Run state of program run should be defined");
Transactions.executeUnchecked(transactional, new TxRunnable() {
@Override
public void run(DatasetContext context) throws Exception {
AppMetadataStore metaStore = getAppMetadataStore(context);
metaStore.recordProgramStop(id, pid, endTime, runStatus, failureCause);
// This block has been added so that completed workflow runs can be logged to the workflow dataset
WorkflowId workflowId = new WorkflowId(id.getParent(), id.getProgram());
if (id.getType() == ProgramType.WORKFLOW && runStatus == ProgramRunStatus.COMPLETED) {
recordCompletedWorkflow(metaStore, getWorkflowDataset(context), workflowId, pid);
}
// todo: delete old history data
}
});
}
use of co.cask.cdap.api.TxRunnable in project cdap by caskdata.
the class ETLWorker method run.
@Override
public void run() {
final SourceState currentState = new SourceState();
final SourceState nextState = new SourceState();
final Map<String, List<Object>> dataToSink = new HashMap<>();
boolean hasData = false;
final Map<String, List<InvalidEntry>> transformIdToErrorRecords = intializeTransformIdToErrorsList();
final WorkerContext context = getContext();
Set<String> transformErrorsWithoutDataset = Sets.newHashSet();
// Fetch SourceState from State Table.
// Only required at the beginning since we persist the state if there is a change.
Transactionals.execute(context, new TxRunnable() {
@Override
public void run(DatasetContext context) throws Exception {
KeyValueTable stateTable = context.getDataset(ETLRealtimeApplication.STATE_TABLE);
byte[] stateBytes = stateTable.read(stateStoreKeyBytes);
if (stateBytes != null) {
SourceState state = GSON.fromJson(Bytes.toString(stateBytes), SourceState.class);
currentState.setState(state);
}
}
});
DefaultEmitter<Object> sourceEmitter = new DefaultEmitter<>();
TrackedEmitter<Object> trackedSourceEmitter = new TrackedEmitter<>(sourceEmitter, new DefaultStageMetrics(metrics, sourceStageName), TrackedTransform.RECORDS_OUT, context.getDataTracer(sourceStageName));
while (!stopped) {
// Invoke poll method of the source to fetch data
try {
SourceState newState = source.poll(trackedSourceEmitter, new SourceState(currentState));
if (newState != null) {
nextState.setState(newState);
}
} catch (Exception e) {
// Continue since the source threw an exception. No point in processing records and state is not changed.
LOG.warn("Exception thrown during polling of Source for data", e);
sourceEmitter.reset();
continue;
}
// to be persisted in the sink.
for (Object sourceData : sourceEmitter.getEntries()) {
try {
TransformResponse transformResponse = transformExecutor.runOneIteration(sourceData);
for (Map.Entry<String, Collection<Object>> transformedValues : transformResponse.getSinksResults().entrySet()) {
dataToSink.put(transformedValues.getKey(), new ArrayList<>());
Iterator emitterIterator = transformedValues.getValue().iterator();
while (emitterIterator.hasNext()) {
if (!hasData) {
hasData = true;
}
dataToSink.get(transformedValues.getKey()).add(emitterIterator.next());
}
}
for (Map.Entry<String, Collection<InvalidEntry<Object>>> transformErrorsEntry : transformResponse.getMapTransformIdToErrorEmitter().entrySet()) {
if (!transformErrorsWithoutDataset.contains(transformErrorsEntry.getKey())) {
if (!tranformIdToDatasetName.containsKey(transformErrorsEntry.getKey()) && !transformErrorsEntry.getValue().isEmpty()) {
transformErrorsWithoutDataset.add(transformErrorsEntry.getKey());
LOG.warn("Error records were emitted in transform {}, " + "but error dataset is not configured for this transform", transformErrorsEntry.getKey());
}
if (tranformIdToDatasetName.containsKey(transformErrorsEntry.getKey()) && !transformErrorsEntry.getValue().isEmpty()) {
// add the errors
if (!hasData && transformErrorsEntry.getValue().size() > 0) {
hasData = true;
}
transformIdToErrorRecords.get(transformErrorsEntry.getKey()).addAll(transformErrorsEntry.getValue());
}
}
}
} catch (Exception e) {
LOG.warn("Exception thrown while processing data {}", sourceData, e);
}
}
sourceEmitter.reset();
// Start a Transaction if there is data to persist or if the Source state has changed.
try {
if (hasData || (!nextState.equals(currentState))) {
getContext().execute(new TxRunnable() {
@Override
public void run(DatasetContext context) throws Exception {
// Invoke the sink's write method if there is any object to be written.
if (!dataToSink.isEmpty()) {
DefaultDataWriter defaultDataWriter = new DefaultDataWriter(getContext(), context);
for (Map.Entry<String, List<Object>> sinkEntry : dataToSink.entrySet()) {
sinks.get(sinkEntry.getKey()).write(sinkEntry.getValue(), defaultDataWriter);
}
}
for (Map.Entry<String, List<InvalidEntry>> errorRecordEntry : transformIdToErrorRecords.entrySet()) {
String transformId = errorRecordEntry.getKey();
final String datasetName = tranformIdToDatasetName.get(transformId);
Table errorTable = context.getDataset(datasetName);
long timeInMillis = System.currentTimeMillis();
byte[] currentTime = Bytes.toBytes(timeInMillis);
String transformIdentifier = appName + SEPARATOR + transformId;
for (InvalidEntry invalidEntry : errorRecordEntry.getValue()) {
// using random uuid as we want to write each record uniquely,
// but we are not concerned about the uuid while scanning later.
byte[] rowKey = Bytes.concat(currentTime, Bytes.toBytes(transformIdentifier), Bytes.toBytes(UUID.randomUUID()));
Put errorPut = constructErrorPut(rowKey, invalidEntry, timeInMillis);
errorTable.write(rowKey, errorPut);
}
}
// Persist nextState if it is different from currentState
if (!nextState.equals(currentState)) {
KeyValueTable stateTable = context.getDataset(ETLRealtimeApplication.STATE_TABLE);
stateTable.write(stateStoreKey, GSON.toJson(nextState));
}
// after running one iteration and succesfully writing to sinks and error datasets, reset the emitters.
transformExecutor.resetEmitter();
}
});
// Update the in-memory copy of the state only if the transaction succeeded.
currentState.setState(nextState);
}
} catch (Exception e) {
LOG.warn("Exception thrown during persisting of data", e);
} finally {
// Clear the persisted sink data (in case transaction failure occurred, we will poll the source with old state)
hasData = false;
dataToSink.clear();
for (List<InvalidEntry> invalidEntryList : transformIdToErrorRecords.values()) {
invalidEntryList.clear();
}
}
}
}
use of co.cask.cdap.api.TxRunnable in project cdap by caskdata.
the class BodyConsumerAdapter method onError.
/**
* Calls the {@link HttpContentConsumer#onError(HttpServiceResponder, Throwable)} method from a transaction.
*/
private void onError(final Throwable cause, final DelayedHttpServiceResponder responder) {
if (completed) {
return;
}
// To the HttpContentConsumer, once onError is called, no other methods will be triggered
completed = true;
TransactionControl txCtrl = Transactions.getTransactionControl(TransactionControl.IMPLICIT, HttpContentConsumer.class, delegate, "onError", HttpServiceResponder.class, Throwable.class);
try {
if (TransactionControl.IMPLICIT == txCtrl) {
transactional.execute(new TxRunnable() {
@Override
public void run(DatasetContext context) throws Exception {
delegate.onError(responder, cause);
}
});
} else {
delegate.onError(responder, cause);
}
} catch (Throwable t) {
responder.setTransactionFailureResponse(t);
LOG.warn("Exception in calling HttpContentConsumer.onError", t);
} finally {
try {
responder.execute(false);
} finally {
if (!responder.hasContentProducer()) {
contextReleaser.cancel();
}
}
}
}
use of co.cask.cdap.api.TxRunnable in project cdap by caskdata.
the class SparkRuntimeService method initialize.
/**
* Calls the {@link Spark#beforeSubmit(SparkClientContext)} for the pre 3.5 Spark programs, calls
* the {@link ProgramLifecycle#initialize} otherwise.
*/
@SuppressWarnings("unchecked")
private void initialize() throws Exception {
// AbstractSpark implements final initialize(context) and requires subclass to
// implement initialize(), whereas programs that directly implement Spark have
// the option to override initialize(context) (if they implement ProgramLifeCycle)
final TransactionControl txControl = spark instanceof AbstractSpark ? Transactions.getTransactionControl(TransactionControl.IMPLICIT, AbstractSpark.class, spark, "initialize") : spark instanceof ProgramLifecycle ? Transactions.getTransactionControl(TransactionControl.IMPLICIT, Spark.class, spark, "initialize", SparkClientContext.class) : TransactionControl.IMPLICIT;
TxRunnable runnable = new TxRunnable() {
@Override
public void run(DatasetContext ctxt) throws Exception {
Cancellable cancellable = SparkRuntimeUtils.setContextClassLoader(new SparkClassLoader(runtimeContext));
try {
context.setState(new ProgramState(ProgramStatus.INITIALIZING, null));
if (spark instanceof ProgramLifecycle) {
((ProgramLifecycle) spark).initialize(context);
} else {
spark.beforeSubmit(context);
}
} finally {
cancellable.cancel();
}
}
};
if (TransactionControl.IMPLICIT == txControl) {
context.execute(runnable);
} else {
runnable.run(context);
}
}
use of co.cask.cdap.api.TxRunnable in project cdap by caskdata.
the class FileMetadataCleaner method scanAndGetFilesToDelete.
/**
* scans for meta data in new format which has expired the log retention.
* @param tillTime time till which files will be deleted
* @param transactionTimeout transaction timeout to use for scanning entries, deleting entries.
* @return list of DeleteEntry - used to get files to delete for which metadata has already been deleted
*/
public List<DeletedEntry> scanAndGetFilesToDelete(final long tillTime, final int transactionTimeout) {
final List<DeletedEntry> toDelete = new ArrayList<>();
// we make sure transactionTimeout is greater than TX_TIMEOUT_DISCOUNT_SECS in CDAPLogAppender check.
final int cutOffTransactionTime = transactionTimeout - TX_TIMEOUT_DISCOUNT_SECS;
try {
transactional.execute(transactionTimeout, new TxRunnable() {
@Override
public void run(DatasetContext context) throws Exception {
Table table = LoggingStoreTableUtil.getMetadataTable(context, datasetManager);
Stopwatch stopwatch = new Stopwatch().start();
byte[] startRowKey = NEW_ROW_KEY_PREFIX;
byte[] endRowKey = NEW_ROW_KEY_PREFIX_END;
boolean reachedEnd = false;
while (!reachedEnd) {
try (Scanner scanner = table.scan(startRowKey, endRowKey)) {
while (stopwatch.elapsedTime(TimeUnit.SECONDS) < cutOffTransactionTime) {
Row row = scanner.next();
if (row == null) {
// if row is null, then scanner next returned null. so we have reached the end.
reachedEnd = true;
break;
}
byte[] rowkey = row.getRow();
// file creation time is the last 8-bytes in rowkey in the new format
long creationTime = Bytes.toLong(rowkey, rowkey.length - Bytes.SIZEOF_LONG, Bytes.SIZEOF_LONG);
if (creationTime <= tillTime) {
// expired - can be deleted
toDelete.add(new DeletedEntry(rowkey, Bytes.toString(row.get(LoggingStoreTableUtil.META_TABLE_COLUMN_KEY))));
} else {
// update start-row key based on the logging context and start a new scan.
startRowKey = Bytes.add(NEW_ROW_KEY_PREFIX, getNextContextStartKey(rowkey));
break;
}
}
}
}
}
});
} catch (TransactionFailureException e) {
LOG.warn("Got Exception while scanning metadata table", e);
// if there is an exception, no metadata, so delete file should be skipped.
return new ArrayList<>();
}
if (!toDelete.isEmpty()) {
// we will call delete on old metadata even whenever there is expired entries to delete in new format.
// though the first call will delete all old meta data.
scanAndDeleteOldMetaData(transactionTimeout, cutOffTransactionTime);
// delete meta data entries in toDelete and get the file location list
return deleteNewMetadataEntries(toDelete, transactionTimeout, cutOffTransactionTime);
}
// toDelete is empty, safe to return that
return toDelete;
}
Aggregations