Search in sources :

Example 6 with TxRunnable

use of co.cask.cdap.api.TxRunnable in project cdap by caskdata.

the class DefaultStore method setStop.

@Override
public void setStop(final ProgramId id, final String pid, final long endTime, final ProgramRunStatus runStatus, final BasicThrowable failureCause) {
    Preconditions.checkArgument(runStatus != null, "Run state of program run should be defined");
    Transactions.executeUnchecked(transactional, new TxRunnable() {

        @Override
        public void run(DatasetContext context) throws Exception {
            AppMetadataStore metaStore = getAppMetadataStore(context);
            metaStore.recordProgramStop(id, pid, endTime, runStatus, failureCause);
            // This block has been added so that completed workflow runs can be logged to the workflow dataset
            WorkflowId workflowId = new WorkflowId(id.getParent(), id.getProgram());
            if (id.getType() == ProgramType.WORKFLOW && runStatus == ProgramRunStatus.COMPLETED) {
                recordCompletedWorkflow(metaStore, getWorkflowDataset(context), workflowId, pid);
            }
        // todo: delete old history data
        }
    });
}
Also used : TxRunnable(co.cask.cdap.api.TxRunnable) DatasetContext(co.cask.cdap.api.data.DatasetContext) WorkflowId(co.cask.cdap.proto.id.WorkflowId) TransactionFailureException(org.apache.tephra.TransactionFailureException) ProgramNotFoundException(co.cask.cdap.common.ProgramNotFoundException) ApplicationNotFoundException(co.cask.cdap.common.ApplicationNotFoundException) TransactionNotInProgressException(org.apache.tephra.TransactionNotInProgressException) TransactionConflictException(org.apache.tephra.TransactionConflictException) DatasetManagementException(co.cask.cdap.api.dataset.DatasetManagementException) NoSuchElementException(java.util.NoSuchElementException) IOException(java.io.IOException)

Example 7 with TxRunnable

use of co.cask.cdap.api.TxRunnable in project cdap by caskdata.

the class ETLWorker method run.

@Override
public void run() {
    final SourceState currentState = new SourceState();
    final SourceState nextState = new SourceState();
    final Map<String, List<Object>> dataToSink = new HashMap<>();
    boolean hasData = false;
    final Map<String, List<InvalidEntry>> transformIdToErrorRecords = intializeTransformIdToErrorsList();
    final WorkerContext context = getContext();
    Set<String> transformErrorsWithoutDataset = Sets.newHashSet();
    // Fetch SourceState from State Table.
    // Only required at the beginning since we persist the state if there is a change.
    Transactionals.execute(context, new TxRunnable() {

        @Override
        public void run(DatasetContext context) throws Exception {
            KeyValueTable stateTable = context.getDataset(ETLRealtimeApplication.STATE_TABLE);
            byte[] stateBytes = stateTable.read(stateStoreKeyBytes);
            if (stateBytes != null) {
                SourceState state = GSON.fromJson(Bytes.toString(stateBytes), SourceState.class);
                currentState.setState(state);
            }
        }
    });
    DefaultEmitter<Object> sourceEmitter = new DefaultEmitter<>();
    TrackedEmitter<Object> trackedSourceEmitter = new TrackedEmitter<>(sourceEmitter, new DefaultStageMetrics(metrics, sourceStageName), TrackedTransform.RECORDS_OUT, context.getDataTracer(sourceStageName));
    while (!stopped) {
        // Invoke poll method of the source to fetch data
        try {
            SourceState newState = source.poll(trackedSourceEmitter, new SourceState(currentState));
            if (newState != null) {
                nextState.setState(newState);
            }
        } catch (Exception e) {
            // Continue since the source threw an exception. No point in processing records and state is not changed.
            LOG.warn("Exception thrown during polling of Source for data", e);
            sourceEmitter.reset();
            continue;
        }
        // to be persisted in the sink.
        for (Object sourceData : sourceEmitter.getEntries()) {
            try {
                TransformResponse transformResponse = transformExecutor.runOneIteration(sourceData);
                for (Map.Entry<String, Collection<Object>> transformedValues : transformResponse.getSinksResults().entrySet()) {
                    dataToSink.put(transformedValues.getKey(), new ArrayList<>());
                    Iterator emitterIterator = transformedValues.getValue().iterator();
                    while (emitterIterator.hasNext()) {
                        if (!hasData) {
                            hasData = true;
                        }
                        dataToSink.get(transformedValues.getKey()).add(emitterIterator.next());
                    }
                }
                for (Map.Entry<String, Collection<InvalidEntry<Object>>> transformErrorsEntry : transformResponse.getMapTransformIdToErrorEmitter().entrySet()) {
                    if (!transformErrorsWithoutDataset.contains(transformErrorsEntry.getKey())) {
                        if (!tranformIdToDatasetName.containsKey(transformErrorsEntry.getKey()) && !transformErrorsEntry.getValue().isEmpty()) {
                            transformErrorsWithoutDataset.add(transformErrorsEntry.getKey());
                            LOG.warn("Error records were emitted in transform {}, " + "but error dataset is not configured for this transform", transformErrorsEntry.getKey());
                        }
                        if (tranformIdToDatasetName.containsKey(transformErrorsEntry.getKey()) && !transformErrorsEntry.getValue().isEmpty()) {
                            // add the errors
                            if (!hasData && transformErrorsEntry.getValue().size() > 0) {
                                hasData = true;
                            }
                            transformIdToErrorRecords.get(transformErrorsEntry.getKey()).addAll(transformErrorsEntry.getValue());
                        }
                    }
                }
            } catch (Exception e) {
                LOG.warn("Exception thrown while processing data {}", sourceData, e);
            }
        }
        sourceEmitter.reset();
        // Start a Transaction if there is data to persist or if the Source state has changed.
        try {
            if (hasData || (!nextState.equals(currentState))) {
                getContext().execute(new TxRunnable() {

                    @Override
                    public void run(DatasetContext context) throws Exception {
                        // Invoke the sink's write method if there is any object to be written.
                        if (!dataToSink.isEmpty()) {
                            DefaultDataWriter defaultDataWriter = new DefaultDataWriter(getContext(), context);
                            for (Map.Entry<String, List<Object>> sinkEntry : dataToSink.entrySet()) {
                                sinks.get(sinkEntry.getKey()).write(sinkEntry.getValue(), defaultDataWriter);
                            }
                        }
                        for (Map.Entry<String, List<InvalidEntry>> errorRecordEntry : transformIdToErrorRecords.entrySet()) {
                            String transformId = errorRecordEntry.getKey();
                            final String datasetName = tranformIdToDatasetName.get(transformId);
                            Table errorTable = context.getDataset(datasetName);
                            long timeInMillis = System.currentTimeMillis();
                            byte[] currentTime = Bytes.toBytes(timeInMillis);
                            String transformIdentifier = appName + SEPARATOR + transformId;
                            for (InvalidEntry invalidEntry : errorRecordEntry.getValue()) {
                                // using random uuid as we want to write each record uniquely,
                                // but we are not concerned about the uuid while scanning later.
                                byte[] rowKey = Bytes.concat(currentTime, Bytes.toBytes(transformIdentifier), Bytes.toBytes(UUID.randomUUID()));
                                Put errorPut = constructErrorPut(rowKey, invalidEntry, timeInMillis);
                                errorTable.write(rowKey, errorPut);
                            }
                        }
                        // Persist nextState if it is different from currentState
                        if (!nextState.equals(currentState)) {
                            KeyValueTable stateTable = context.getDataset(ETLRealtimeApplication.STATE_TABLE);
                            stateTable.write(stateStoreKey, GSON.toJson(nextState));
                        }
                        // after running one iteration and succesfully writing to sinks and error datasets, reset the emitters.
                        transformExecutor.resetEmitter();
                    }
                });
                // Update the in-memory copy of the state only if the transaction succeeded.
                currentState.setState(nextState);
            }
        } catch (Exception e) {
            LOG.warn("Exception thrown during persisting of data", e);
        } finally {
            // Clear the persisted sink data (in case transaction failure occurred, we will poll the source with old state)
            hasData = false;
            dataToSink.clear();
            for (List<InvalidEntry> invalidEntryList : transformIdToErrorRecords.values()) {
                invalidEntryList.clear();
            }
        }
    }
}
Also used : DefaultEmitter(co.cask.cdap.etl.common.DefaultEmitter) HashMap(java.util.HashMap) InvalidEntry(co.cask.cdap.etl.api.InvalidEntry) TxRunnable(co.cask.cdap.api.TxRunnable) TrackedEmitter(co.cask.cdap.etl.common.TrackedEmitter) CloseableIterator(co.cask.cdap.api.dataset.lib.CloseableIterator) Iterator(java.util.Iterator) List(java.util.List) ArrayList(java.util.ArrayList) DatasetContext(co.cask.cdap.api.data.DatasetContext) InvalidEntry(co.cask.cdap.etl.api.InvalidEntry) SourceState(co.cask.cdap.etl.api.realtime.SourceState) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) Table(co.cask.cdap.api.dataset.table.Table) IOException(java.io.IOException) Put(co.cask.cdap.api.dataset.table.Put) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) Collection(java.util.Collection) TransformResponse(co.cask.cdap.etl.common.TransformResponse) WorkerContext(co.cask.cdap.api.worker.WorkerContext) Map(java.util.Map) HashMap(java.util.HashMap) DefaultStageMetrics(co.cask.cdap.etl.common.DefaultStageMetrics)

Example 8 with TxRunnable

use of co.cask.cdap.api.TxRunnable in project cdap by caskdata.

the class BodyConsumerAdapter method onError.

/**
   * Calls the {@link HttpContentConsumer#onError(HttpServiceResponder, Throwable)} method from a transaction.
   */
private void onError(final Throwable cause, final DelayedHttpServiceResponder responder) {
    if (completed) {
        return;
    }
    // To the HttpContentConsumer, once onError is called, no other methods will be triggered
    completed = true;
    TransactionControl txCtrl = Transactions.getTransactionControl(TransactionControl.IMPLICIT, HttpContentConsumer.class, delegate, "onError", HttpServiceResponder.class, Throwable.class);
    try {
        if (TransactionControl.IMPLICIT == txCtrl) {
            transactional.execute(new TxRunnable() {

                @Override
                public void run(DatasetContext context) throws Exception {
                    delegate.onError(responder, cause);
                }
            });
        } else {
            delegate.onError(responder, cause);
        }
    } catch (Throwable t) {
        responder.setTransactionFailureResponse(t);
        LOG.warn("Exception in calling HttpContentConsumer.onError", t);
    } finally {
        try {
            responder.execute(false);
        } finally {
            if (!responder.hasContentProducer()) {
                contextReleaser.cancel();
            }
        }
    }
}
Also used : TxRunnable(co.cask.cdap.api.TxRunnable) TransactionControl(co.cask.cdap.api.annotation.TransactionControl) DatasetContext(co.cask.cdap.api.data.DatasetContext)

Example 9 with TxRunnable

use of co.cask.cdap.api.TxRunnable in project cdap by caskdata.

the class SparkRuntimeService method initialize.

/**
   * Calls the {@link Spark#beforeSubmit(SparkClientContext)} for the pre 3.5 Spark programs, calls
   * the {@link ProgramLifecycle#initialize} otherwise.
   */
@SuppressWarnings("unchecked")
private void initialize() throws Exception {
    // AbstractSpark implements final initialize(context) and requires subclass to
    // implement initialize(), whereas programs that directly implement Spark have
    // the option to override initialize(context) (if they implement ProgramLifeCycle)
    final TransactionControl txControl = spark instanceof AbstractSpark ? Transactions.getTransactionControl(TransactionControl.IMPLICIT, AbstractSpark.class, spark, "initialize") : spark instanceof ProgramLifecycle ? Transactions.getTransactionControl(TransactionControl.IMPLICIT, Spark.class, spark, "initialize", SparkClientContext.class) : TransactionControl.IMPLICIT;
    TxRunnable runnable = new TxRunnable() {

        @Override
        public void run(DatasetContext ctxt) throws Exception {
            Cancellable cancellable = SparkRuntimeUtils.setContextClassLoader(new SparkClassLoader(runtimeContext));
            try {
                context.setState(new ProgramState(ProgramStatus.INITIALIZING, null));
                if (spark instanceof ProgramLifecycle) {
                    ((ProgramLifecycle) spark).initialize(context);
                } else {
                    spark.beforeSubmit(context);
                }
            } finally {
                cancellable.cancel();
            }
        }
    };
    if (TransactionControl.IMPLICIT == txControl) {
        context.execute(runnable);
    } else {
        runnable.run(context);
    }
}
Also used : ProgramLifecycle(co.cask.cdap.api.ProgramLifecycle) TxRunnable(co.cask.cdap.api.TxRunnable) Cancellable(org.apache.twill.common.Cancellable) TransactionControl(co.cask.cdap.api.annotation.TransactionControl) ProgramState(co.cask.cdap.api.ProgramState) DatasetContext(co.cask.cdap.api.data.DatasetContext) AbstractSpark(co.cask.cdap.api.spark.AbstractSpark)

Example 10 with TxRunnable

use of co.cask.cdap.api.TxRunnable in project cdap by caskdata.

the class FileMetadataCleaner method scanAndGetFilesToDelete.

/**
   * scans for meta data in new format which has expired the log retention.
   * @param tillTime time till which files will be deleted
   * @param transactionTimeout transaction timeout to use for scanning entries, deleting entries.
   * @return list of DeleteEntry - used to get files to delete for which metadata has already been deleted
   */
public List<DeletedEntry> scanAndGetFilesToDelete(final long tillTime, final int transactionTimeout) {
    final List<DeletedEntry> toDelete = new ArrayList<>();
    // we make sure transactionTimeout is greater than TX_TIMEOUT_DISCOUNT_SECS in CDAPLogAppender check.
    final int cutOffTransactionTime = transactionTimeout - TX_TIMEOUT_DISCOUNT_SECS;
    try {
        transactional.execute(transactionTimeout, new TxRunnable() {

            @Override
            public void run(DatasetContext context) throws Exception {
                Table table = LoggingStoreTableUtil.getMetadataTable(context, datasetManager);
                Stopwatch stopwatch = new Stopwatch().start();
                byte[] startRowKey = NEW_ROW_KEY_PREFIX;
                byte[] endRowKey = NEW_ROW_KEY_PREFIX_END;
                boolean reachedEnd = false;
                while (!reachedEnd) {
                    try (Scanner scanner = table.scan(startRowKey, endRowKey)) {
                        while (stopwatch.elapsedTime(TimeUnit.SECONDS) < cutOffTransactionTime) {
                            Row row = scanner.next();
                            if (row == null) {
                                // if row is null, then scanner next returned null. so we have reached the end.
                                reachedEnd = true;
                                break;
                            }
                            byte[] rowkey = row.getRow();
                            // file creation time is the last 8-bytes in rowkey in the new format
                            long creationTime = Bytes.toLong(rowkey, rowkey.length - Bytes.SIZEOF_LONG, Bytes.SIZEOF_LONG);
                            if (creationTime <= tillTime) {
                                // expired - can be deleted
                                toDelete.add(new DeletedEntry(rowkey, Bytes.toString(row.get(LoggingStoreTableUtil.META_TABLE_COLUMN_KEY))));
                            } else {
                                // update start-row key based on the logging context and start a new scan.
                                startRowKey = Bytes.add(NEW_ROW_KEY_PREFIX, getNextContextStartKey(rowkey));
                                break;
                            }
                        }
                    }
                }
            }
        });
    } catch (TransactionFailureException e) {
        LOG.warn("Got Exception while scanning metadata table", e);
        // if there is an exception, no metadata, so delete file should be skipped.
        return new ArrayList<>();
    }
    if (!toDelete.isEmpty()) {
        // we will call delete on old metadata even whenever there is expired entries to delete in new format.
        // though the first call will delete all old meta data.
        scanAndDeleteOldMetaData(transactionTimeout, cutOffTransactionTime);
        // delete meta data entries in toDelete and get the file location list
        return deleteNewMetadataEntries(toDelete, transactionTimeout, cutOffTransactionTime);
    }
    // toDelete is empty, safe to return that
    return toDelete;
}
Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) Table(co.cask.cdap.api.dataset.table.Table) ArrayList(java.util.ArrayList) Stopwatch(com.google.common.base.Stopwatch) TransactionFailureException(org.apache.tephra.TransactionFailureException) TransactionFailureException(org.apache.tephra.TransactionFailureException) TxRunnable(co.cask.cdap.api.TxRunnable) Row(co.cask.cdap.api.dataset.table.Row) DatasetContext(co.cask.cdap.api.data.DatasetContext)

Aggregations

TxRunnable (co.cask.cdap.api.TxRunnable)38 DatasetContext (co.cask.cdap.api.data.DatasetContext)37 IOException (java.io.IOException)18 TransactionFailureException (org.apache.tephra.TransactionFailureException)17 TransactionConflictException (org.apache.tephra.TransactionConflictException)11 DatasetManagementException (co.cask.cdap.api.dataset.DatasetManagementException)10 TransactionControl (co.cask.cdap.api.annotation.TransactionControl)8 Table (co.cask.cdap.api.dataset.table.Table)7 ApplicationNotFoundException (co.cask.cdap.common.ApplicationNotFoundException)6 ProgramNotFoundException (co.cask.cdap.common.ProgramNotFoundException)6 NoSuchElementException (java.util.NoSuchElementException)5 AtomicReference (java.util.concurrent.atomic.AtomicReference)5 TransactionNotInProgressException (org.apache.tephra.TransactionNotInProgressException)5 ProgramLifecycle (co.cask.cdap.api.ProgramLifecycle)4 KeyValueTable (co.cask.cdap.api.dataset.lib.KeyValueTable)4 Put (co.cask.cdap.api.dataset.table.Put)3 SparkExecutionPluginContext (co.cask.cdap.etl.api.batch.SparkExecutionPluginContext)3 ApplicationSpecification (co.cask.cdap.api.app.ApplicationSpecification)2 DataSetException (co.cask.cdap.api.dataset.DataSetException)2 CloseableIterator (co.cask.cdap.api.dataset.lib.CloseableIterator)2