Search in sources :

Example 21 with CancelTaskException

use of org.apache.flink.runtime.execution.CancelTaskException in project flink by splunk.

the class DataSinkTask method invoke.

@Override
public void invoke() throws Exception {
    // --------------------------------------------------------------------
    // Initialize
    // --------------------------------------------------------------------
    LOG.debug(getLogString("Start registering input and output"));
    // initialize OutputFormat
    initOutputFormat();
    // initialize input readers
    try {
        initInputReaders();
    } catch (Exception e) {
        throw new RuntimeException("Initializing the input streams failed" + (e.getMessage() == null ? "." : ": " + e.getMessage()), e);
    }
    LOG.debug(getLogString("Finished registering input and output"));
    // --------------------------------------------------------------------
    // Invoke
    // --------------------------------------------------------------------
    LOG.debug(getLogString("Starting data sink operator"));
    RuntimeContext ctx = createRuntimeContext();
    final Counter numRecordsIn;
    {
        Counter tmpNumRecordsIn;
        try {
            InternalOperatorIOMetricGroup ioMetricGroup = ((InternalOperatorMetricGroup) ctx.getMetricGroup()).getIOMetricGroup();
            ioMetricGroup.reuseInputMetricsForTask();
            ioMetricGroup.reuseOutputMetricsForTask();
            tmpNumRecordsIn = ioMetricGroup.getNumRecordsInCounter();
        } catch (Exception e) {
            LOG.warn("An exception occurred during the metrics setup.", e);
            tmpNumRecordsIn = new SimpleCounter();
        }
        numRecordsIn = tmpNumRecordsIn;
    }
    if (RichOutputFormat.class.isAssignableFrom(this.format.getClass())) {
        ((RichOutputFormat) this.format).setRuntimeContext(ctx);
        LOG.debug(getLogString("Rich Sink detected. Initializing runtime context."));
    }
    ExecutionConfig executionConfig = getExecutionConfig();
    boolean objectReuseEnabled = executionConfig.isObjectReuseEnabled();
    try {
        // initialize local strategies
        MutableObjectIterator<IT> input1;
        switch(this.config.getInputLocalStrategy(0)) {
            case NONE:
                // nothing to do
                localStrategy = null;
                input1 = reader;
                break;
            case SORT:
                // initialize sort local strategy
                try {
                    // get type comparator
                    TypeComparatorFactory<IT> compFact = this.config.getInputComparator(0, getUserCodeClassLoader());
                    if (compFact == null) {
                        throw new Exception("Missing comparator factory for local strategy on input " + 0);
                    }
                    // initialize sorter
                    Sorter<IT> sorter = ExternalSorter.newBuilder(getEnvironment().getMemoryManager(), this, this.inputTypeSerializerFactory.getSerializer(), compFact.createComparator()).maxNumFileHandles(this.config.getFilehandlesInput(0)).enableSpilling(getEnvironment().getIOManager(), this.config.getSpillingThresholdInput(0)).memoryFraction(this.config.getRelativeMemoryInput(0)).objectReuse(this.getExecutionConfig().isObjectReuseEnabled()).largeRecords(this.config.getUseLargeRecordHandler()).build(this.reader);
                    this.localStrategy = sorter;
                    input1 = sorter.getIterator();
                } catch (Exception e) {
                    throw new RuntimeException("Initializing the input processing failed" + (e.getMessage() == null ? "." : ": " + e.getMessage()), e);
                }
                break;
            default:
                throw new RuntimeException("Invalid local strategy for DataSinkTask");
        }
        // read the reader and write it to the output
        final TypeSerializer<IT> serializer = this.inputTypeSerializerFactory.getSerializer();
        final MutableObjectIterator<IT> input = input1;
        final OutputFormat<IT> format = this.format;
        // check if task has been canceled
        if (this.taskCanceled) {
            return;
        }
        LOG.debug(getLogString("Starting to produce output"));
        // open
        format.open(this.getEnvironment().getTaskInfo().getIndexOfThisSubtask(), this.getEnvironment().getTaskInfo().getNumberOfParallelSubtasks());
        if (objectReuseEnabled) {
            IT record = serializer.createInstance();
            // work!
            while (!this.taskCanceled && ((record = input.next(record)) != null)) {
                numRecordsIn.inc();
                format.writeRecord(record);
            }
        } else {
            IT record;
            // work!
            while (!this.taskCanceled && ((record = input.next()) != null)) {
                numRecordsIn.inc();
                format.writeRecord(record);
            }
        }
        // failed.
        if (!this.taskCanceled) {
            this.format.close();
            this.format = null;
        }
    } catch (Exception ex) {
        // make a best effort to clean up
        try {
            if (!cleanupCalled && format instanceof CleanupWhenUnsuccessful) {
                cleanupCalled = true;
                ((CleanupWhenUnsuccessful) format).tryCleanupOnError();
            }
        } catch (Throwable t) {
            LOG.error("Cleanup on error failed.", t);
        }
        ex = ExceptionInChainedStubException.exceptionUnwrap(ex);
        if (ex instanceof CancelTaskException) {
            // forward canceling exception
            throw ex;
        } else // drop, if the task was canceled
        if (!this.taskCanceled) {
            if (LOG.isErrorEnabled()) {
                LOG.error(getLogString("Error in user code: " + ex.getMessage()), ex);
            }
            throw ex;
        }
    } finally {
        if (this.format != null) {
            // This should only be the case if we had a previous error, or were canceled.
            try {
                this.format.close();
            } catch (Throwable t) {
                if (LOG.isWarnEnabled()) {
                    LOG.warn(getLogString("Error closing the output format"), t);
                }
            }
        }
        // close local strategy if necessary
        if (localStrategy != null) {
            try {
                this.localStrategy.close();
            } catch (Throwable t) {
                LOG.error("Error closing local strategy", t);
            }
        }
        BatchTask.clearReaders(new MutableReader<?>[] { inputReader });
    }
    if (!this.taskCanceled) {
        LOG.debug(getLogString("Finished data sink operator"));
    } else {
        LOG.debug(getLogString("Data sink operator cancelled"));
    }
}
Also used : ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) ExceptionInChainedStubException(org.apache.flink.runtime.operators.chaining.ExceptionInChainedStubException) CancelTaskException(org.apache.flink.runtime.execution.CancelTaskException) InternalOperatorIOMetricGroup(org.apache.flink.runtime.metrics.groups.InternalOperatorIOMetricGroup) SimpleCounter(org.apache.flink.metrics.SimpleCounter) Counter(org.apache.flink.metrics.Counter) SimpleCounter(org.apache.flink.metrics.SimpleCounter) CancelTaskException(org.apache.flink.runtime.execution.CancelTaskException) RuntimeContext(org.apache.flink.api.common.functions.RuntimeContext) RichOutputFormat(org.apache.flink.api.common.io.RichOutputFormat) CleanupWhenUnsuccessful(org.apache.flink.api.common.io.CleanupWhenUnsuccessful)

Example 22 with CancelTaskException

use of org.apache.flink.runtime.execution.CancelTaskException in project flink by splunk.

the class DataSourceTask method invoke.

@Override
public void invoke() throws Exception {
    // --------------------------------------------------------------------
    // Initialize
    // --------------------------------------------------------------------
    initInputFormat();
    LOG.debug(getLogString("Start registering input and output"));
    try {
        initOutputs(getEnvironment().getUserCodeClassLoader());
    } catch (Exception ex) {
        throw new RuntimeException("The initialization of the DataSource's outputs caused an error: " + ex.getMessage(), ex);
    }
    LOG.debug(getLogString("Finished registering input and output"));
    // --------------------------------------------------------------------
    // Invoke
    // --------------------------------------------------------------------
    LOG.debug(getLogString("Starting data source operator"));
    RuntimeContext ctx = createRuntimeContext();
    final Counter numRecordsOut;
    {
        Counter tmpNumRecordsOut;
        try {
            InternalOperatorIOMetricGroup ioMetricGroup = ((InternalOperatorMetricGroup) ctx.getMetricGroup()).getIOMetricGroup();
            ioMetricGroup.reuseInputMetricsForTask();
            if (this.config.getNumberOfChainedStubs() == 0) {
                ioMetricGroup.reuseOutputMetricsForTask();
            }
            tmpNumRecordsOut = ioMetricGroup.getNumRecordsOutCounter();
        } catch (Exception e) {
            LOG.warn("An exception occurred during the metrics setup.", e);
            tmpNumRecordsOut = new SimpleCounter();
        }
        numRecordsOut = tmpNumRecordsOut;
    }
    Counter completedSplitsCounter = ctx.getMetricGroup().counter("numSplitsProcessed");
    if (RichInputFormat.class.isAssignableFrom(this.format.getClass())) {
        ((RichInputFormat) this.format).setRuntimeContext(ctx);
        LOG.debug(getLogString("Rich Source detected. Initializing runtime context."));
        ((RichInputFormat) this.format).openInputFormat();
        LOG.debug(getLogString("Rich Source detected. Opening the InputFormat."));
    }
    ExecutionConfig executionConfig = getExecutionConfig();
    boolean objectReuseEnabled = executionConfig.isObjectReuseEnabled();
    LOG.debug("DataSourceTask object reuse: " + (objectReuseEnabled ? "ENABLED" : "DISABLED") + ".");
    final TypeSerializer<OT> serializer = this.serializerFactory.getSerializer();
    try {
        // start all chained tasks
        BatchTask.openChainedTasks(this.chainedTasks, this);
        // get input splits to read
        final Iterator<InputSplit> splitIterator = getInputSplits();
        // for each assigned input split
        while (!this.taskCanceled && splitIterator.hasNext()) {
            // get start and end
            final InputSplit split = splitIterator.next();
            LOG.debug(getLogString("Opening input split " + split.toString()));
            final InputFormat<OT, InputSplit> format = this.format;
            // open input format
            format.open(split);
            LOG.debug(getLogString("Starting to read input from split " + split.toString()));
            try {
                final Collector<OT> output = new CountingCollector<>(this.output, numRecordsOut);
                if (objectReuseEnabled) {
                    OT reuse = serializer.createInstance();
                    // as long as there is data to read
                    while (!this.taskCanceled && !format.reachedEnd()) {
                        OT returned;
                        if ((returned = format.nextRecord(reuse)) != null) {
                            output.collect(returned);
                        }
                    }
                } else {
                    // as long as there is data to read
                    while (!this.taskCanceled && !format.reachedEnd()) {
                        OT returned;
                        if ((returned = format.nextRecord(serializer.createInstance())) != null) {
                            output.collect(returned);
                        }
                    }
                }
                if (LOG.isDebugEnabled() && !this.taskCanceled) {
                    LOG.debug(getLogString("Closing input split " + split.toString()));
                }
            } finally {
                // close. We close here such that a regular close throwing an exception marks a
                // task as failed.
                format.close();
            }
            completedSplitsCounter.inc();
        }
        // end for all input splits
        // close all chained tasks letting them report failure
        BatchTask.closeChainedTasks(this.chainedTasks, this);
        // close the output collector
        this.output.close();
    } catch (Exception ex) {
        // cause
        try {
            this.format.close();
        } catch (Throwable ignored) {
        }
        BatchTask.cancelChainedTasks(this.chainedTasks);
        ex = ExceptionInChainedStubException.exceptionUnwrap(ex);
        if (ex instanceof CancelTaskException) {
            // forward canceling exception
            throw ex;
        } else if (!this.taskCanceled) {
            // drop exception, if the task was canceled
            BatchTask.logAndThrowException(ex, this);
        }
    } finally {
        BatchTask.clearWriters(eventualOutputs);
        // --------------------------------------------------------------------
        if (this.format != null && RichInputFormat.class.isAssignableFrom(this.format.getClass())) {
            ((RichInputFormat) this.format).closeInputFormat();
            LOG.debug(getLogString("Rich Source detected. Closing the InputFormat."));
        }
    }
    if (!this.taskCanceled) {
        LOG.debug(getLogString("Finished data source operator"));
    } else {
        LOG.debug(getLogString("Data source operator cancelled"));
    }
}
Also used : RichInputFormat(org.apache.flink.api.common.io.RichInputFormat) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) ExceptionInChainedStubException(org.apache.flink.runtime.operators.chaining.ExceptionInChainedStubException) NoSuchElementException(java.util.NoSuchElementException) CancelTaskException(org.apache.flink.runtime.execution.CancelTaskException) InputSplitProviderException(org.apache.flink.runtime.jobgraph.tasks.InputSplitProviderException) CountingCollector(org.apache.flink.runtime.operators.util.metrics.CountingCollector) InternalOperatorIOMetricGroup(org.apache.flink.runtime.metrics.groups.InternalOperatorIOMetricGroup) SimpleCounter(org.apache.flink.metrics.SimpleCounter) Counter(org.apache.flink.metrics.Counter) SimpleCounter(org.apache.flink.metrics.SimpleCounter) CancelTaskException(org.apache.flink.runtime.execution.CancelTaskException) RuntimeContext(org.apache.flink.api.common.functions.RuntimeContext) InputSplit(org.apache.flink.core.io.InputSplit)

Example 23 with CancelTaskException

use of org.apache.flink.runtime.execution.CancelTaskException in project flink by splunk.

the class BufferManager method shouldContinueRequest.

private boolean shouldContinueRequest(BufferPool bufferPool) {
    if (bufferPool.addBufferListener(this)) {
        isWaitingForFloatingBuffers = true;
        numRequiredBuffers = 1;
        return false;
    } else if (bufferPool.isDestroyed()) {
        throw new CancelTaskException("Local buffer pool has already been released.");
    } else {
        return true;
    }
}
Also used : CancelTaskException(org.apache.flink.runtime.execution.CancelTaskException)

Example 24 with CancelTaskException

use of org.apache.flink.runtime.execution.CancelTaskException in project flink by apache.

the class StreamTask method invoke.

@Override
public final void invoke() throws Exception {
    boolean disposed = false;
    try {
        // -------- Initialize ---------
        LOG.debug("Initializing {}.", getName());
        asyncOperationsThreadPool = Executors.newCachedThreadPool();
        configuration = new StreamConfig(getTaskConfiguration());
        stateBackend = createStateBackend();
        accumulatorMap = getEnvironment().getAccumulatorRegistry().getUserMap();
        // if the clock is not already set, then assign a default TimeServiceProvider
        if (timerService == null) {
            ThreadFactory timerThreadFactory = new DispatcherThreadFactory(TRIGGER_THREAD_GROUP, "Time Trigger for " + getName());
            timerService = new SystemProcessingTimeService(this, getCheckpointLock(), timerThreadFactory);
        }
        operatorChain = new OperatorChain<>(this);
        headOperator = operatorChain.getHeadOperator();
        // task specific initialization
        init();
        // save the work of reloading state, etc, if the task is already canceled
        if (canceled) {
            throw new CancelTaskException();
        }
        // -------- Invoke --------
        LOG.debug("Invoking {}", getName());
        // executed before all operators are opened
        synchronized (lock) {
            // both the following operations are protected by the lock
            // so that we avoid race conditions in the case that initializeState()
            // registers a timer, that fires before the open() is called.
            initializeState();
            openAllOperators();
        }
        // final check to exit early before starting to run
        if (canceled) {
            throw new CancelTaskException();
        }
        // let the task do its work
        isRunning = true;
        run();
        // make sure the "clean shutdown" is not attempted
        if (canceled) {
            throw new CancelTaskException();
        }
        // make sure all timers finish and no new timers can come
        timerService.quiesceAndAwaitPending();
        LOG.debug("Finished task {}", getName());
        // at the same time, this makes sure that during any "regular" exit where still
        synchronized (lock) {
            isRunning = false;
            // this is part of the main logic, so if this fails, the task is considered failed
            closeAllOperators();
        }
        LOG.debug("Closed operators for task {}", getName());
        // make sure all buffered data is flushed
        operatorChain.flushOutputs();
        // make an attempt to dispose the operators such that failures in the dispose call
        // still let the computation fail
        tryDisposeAllOperators();
        disposed = true;
    } finally {
        // clean up everything we initialized
        isRunning = false;
        // stop all timers and threads
        if (timerService != null) {
            try {
                timerService.shutdownService();
            } catch (Throwable t) {
                // catch and log the exception to not replace the original exception
                LOG.error("Could not shut down timer service", t);
            }
        }
        // stop all asynchronous checkpoint threads
        try {
            cancelables.close();
            shutdownAsyncThreads();
        } catch (Throwable t) {
            // catch and log the exception to not replace the original exception
            LOG.error("Could not shut down async checkpoint threads", t);
        }
        // we must! perform this cleanup
        try {
            cleanup();
        } catch (Throwable t) {
            // catch and log the exception to not replace the original exception
            LOG.error("Error during cleanup of stream task", t);
        }
        // if the operators were not disposed before, do a hard dispose
        if (!disposed) {
            disposeAllOperators();
        }
        // release the output resources. this method should never fail.
        if (operatorChain != null) {
            operatorChain.releaseOutputs();
        }
    }
}
Also used : ThreadFactory(java.util.concurrent.ThreadFactory) DispatcherThreadFactory(org.apache.flink.runtime.taskmanager.DispatcherThreadFactory) DispatcherThreadFactory(org.apache.flink.runtime.taskmanager.DispatcherThreadFactory) CancelTaskException(org.apache.flink.runtime.execution.CancelTaskException) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig)

Example 25 with CancelTaskException

use of org.apache.flink.runtime.execution.CancelTaskException in project flink by apache.

the class Task method run.

/**
	 * The core work method that bootstraps the task and executes it code
	 */
@Override
public void run() {
    // ----------------------------
    while (true) {
        ExecutionState current = this.executionState;
        if (current == ExecutionState.CREATED) {
            if (transitionState(ExecutionState.CREATED, ExecutionState.DEPLOYING)) {
                // success, we can start our work
                break;
            }
        } else if (current == ExecutionState.FAILED) {
            // we were immediately failed. tell the TaskManager that we reached our final state
            notifyFinalState();
            return;
        } else if (current == ExecutionState.CANCELING) {
            if (transitionState(ExecutionState.CANCELING, ExecutionState.CANCELED)) {
                // we were immediately canceled. tell the TaskManager that we reached our final state
                notifyFinalState();
                return;
            }
        } else {
            throw new IllegalStateException("Invalid state for beginning of operation of task " + this + '.');
        }
    }
    // all resource acquisitions and registrations from here on
    // need to be undone in the end
    Map<String, Future<Path>> distributedCacheEntries = new HashMap<String, Future<Path>>();
    AbstractInvokable invokable = null;
    ClassLoader userCodeClassLoader;
    try {
        // ----------------------------
        //  Task Bootstrap - We periodically
        //  check for canceling as a shortcut
        // ----------------------------
        // activate safety net for task thread
        LOG.info("Creating FileSystem stream leak safety net for task {}", this);
        FileSystemSafetyNet.initializeSafetyNetForThread();
        // first of all, get a user-code classloader
        // this may involve downloading the job's JAR files and/or classes
        LOG.info("Loading JAR files for task {}.", this);
        userCodeClassLoader = createUserCodeClassloader(libraryCache);
        final ExecutionConfig executionConfig = serializedExecutionConfig.deserializeValue(userCodeClassLoader);
        if (executionConfig.getTaskCancellationInterval() >= 0) {
            // override task cancellation interval from Flink config if set in ExecutionConfig
            taskCancellationInterval = executionConfig.getTaskCancellationInterval();
        }
        if (executionConfig.getTaskCancellationTimeout() >= 0) {
            // override task cancellation timeout from Flink config if set in ExecutionConfig
            taskCancellationTimeout = executionConfig.getTaskCancellationTimeout();
        }
        // now load the task's invokable code
        invokable = loadAndInstantiateInvokable(userCodeClassLoader, nameOfInvokableClass);
        if (isCanceledOrFailed()) {
            throw new CancelTaskException();
        }
        // ----------------------------------------------------------------
        // register the task with the network stack
        // this operation may fail if the system does not have enough
        // memory to run the necessary data exchanges
        // the registration must also strictly be undone
        // ----------------------------------------------------------------
        LOG.info("Registering task at network: {}.", this);
        network.registerTask(this);
        // next, kick off the background copying of files for the distributed cache
        try {
            for (Map.Entry<String, DistributedCache.DistributedCacheEntry> entry : DistributedCache.readFileInfoFromConfig(jobConfiguration)) {
                LOG.info("Obtaining local cache file for '{}'.", entry.getKey());
                Future<Path> cp = fileCache.createTmpFile(entry.getKey(), entry.getValue(), jobId);
                distributedCacheEntries.put(entry.getKey(), cp);
            }
        } catch (Exception e) {
            throw new Exception(String.format("Exception while adding files to distributed cache of task %s (%s).", taskNameWithSubtask, executionId), e);
        }
        if (isCanceledOrFailed()) {
            throw new CancelTaskException();
        }
        // ----------------------------------------------------------------
        //  call the user code initialization methods
        // ----------------------------------------------------------------
        TaskKvStateRegistry kvStateRegistry = network.createKvStateTaskRegistry(jobId, getJobVertexId());
        Environment env = new RuntimeEnvironment(jobId, vertexId, executionId, executionConfig, taskInfo, jobConfiguration, taskConfiguration, userCodeClassLoader, memoryManager, ioManager, broadcastVariableManager, accumulatorRegistry, kvStateRegistry, inputSplitProvider, distributedCacheEntries, writers, inputGates, checkpointResponder, taskManagerConfig, metrics, this);
        // let the task code create its readers and writers
        invokable.setEnvironment(env);
        if (null != taskStateHandles) {
            if (invokable instanceof StatefulTask) {
                StatefulTask op = (StatefulTask) invokable;
                op.setInitialState(taskStateHandles);
            } else {
                throw new IllegalStateException("Found operator state for a non-stateful task invokable");
            }
            // be memory and GC friendly - since the code stays in invoke() for a potentially long time,
            // we clear the reference to the state handle
            //noinspection UnusedAssignment
            taskStateHandles = null;
        }
        // ----------------------------------------------------------------
        //  actual task core work
        // ----------------------------------------------------------------
        // we must make strictly sure that the invokable is accessible to the cancel() call
        // by the time we switched to running.
        this.invokable = invokable;
        // switch to the RUNNING state, if that fails, we have been canceled/failed in the meantime
        if (!transitionState(ExecutionState.DEPLOYING, ExecutionState.RUNNING)) {
            throw new CancelTaskException();
        }
        // notify everyone that we switched to running
        notifyObservers(ExecutionState.RUNNING, null);
        taskManagerActions.updateTaskExecutionState(new TaskExecutionState(jobId, executionId, ExecutionState.RUNNING));
        // make sure the user code classloader is accessible thread-locally
        executingThread.setContextClassLoader(userCodeClassLoader);
        // run the invokable
        invokable.invoke();
        // to the fact that it has been canceled
        if (isCanceledOrFailed()) {
            throw new CancelTaskException();
        }
        // finish the produced partitions. if this fails, we consider the execution failed.
        for (ResultPartition partition : producedPartitions) {
            if (partition != null) {
                partition.finish();
            }
        }
        // if that fails, the task was canceled/failed in the meantime
        if (transitionState(ExecutionState.RUNNING, ExecutionState.FINISHED)) {
            notifyObservers(ExecutionState.FINISHED, null);
        } else {
            throw new CancelTaskException();
        }
    } catch (Throwable t) {
        try {
            // check if the exception is unrecoverable
            if (ExceptionUtils.isJvmFatalError(t) || (t instanceof OutOfMemoryError && taskManagerConfig.shouldExitJvmOnOutOfMemoryError())) {
                // don't attempt a clean shutdown, because we cannot expect the clean shutdown to complete
                try {
                    LOG.error("Encountered fatal error {} - terminating the JVM", t.getClass().getName(), t);
                } finally {
                    Runtime.getRuntime().halt(-1);
                }
            }
            // to failExternally()
            while (true) {
                ExecutionState current = this.executionState;
                if (current == ExecutionState.RUNNING || current == ExecutionState.DEPLOYING) {
                    if (t instanceof CancelTaskException) {
                        if (transitionState(current, ExecutionState.CANCELED)) {
                            cancelInvokable();
                            notifyObservers(ExecutionState.CANCELED, null);
                            break;
                        }
                    } else {
                        if (transitionState(current, ExecutionState.FAILED, t)) {
                            // proper failure of the task. record the exception as the root cause
                            String errorMessage = String.format("Execution of %s (%s) failed.", taskNameWithSubtask, executionId);
                            failureCause = t;
                            cancelInvokable();
                            notifyObservers(ExecutionState.FAILED, new Exception(errorMessage, t));
                            break;
                        }
                    }
                } else if (current == ExecutionState.CANCELING) {
                    if (transitionState(current, ExecutionState.CANCELED)) {
                        notifyObservers(ExecutionState.CANCELED, null);
                        break;
                    }
                } else if (current == ExecutionState.FAILED) {
                    // in state failed already, no transition necessary any more
                    break;
                } else // unexpected state, go to failed
                if (transitionState(current, ExecutionState.FAILED, t)) {
                    LOG.error("Unexpected state in task {} ({}) during an exception: {}.", taskNameWithSubtask, executionId, current);
                    break;
                }
            // else fall through the loop and
            }
        } catch (Throwable tt) {
            String message = String.format("FATAL - exception in exception handler of task %s (%s).", taskNameWithSubtask, executionId);
            LOG.error(message, tt);
            notifyFatalError(message, tt);
        }
    } finally {
        try {
            LOG.info("Freeing task resources for {} ({}).", taskNameWithSubtask, executionId);
            // stop the async dispatcher.
            // copy dispatcher reference to stack, against concurrent release
            ExecutorService dispatcher = this.asyncCallDispatcher;
            if (dispatcher != null && !dispatcher.isShutdown()) {
                dispatcher.shutdownNow();
            }
            // free the network resources
            network.unregisterTask(this);
            // free memory resources
            if (invokable != null) {
                memoryManager.releaseAll(invokable);
            }
            // remove all of the tasks library resources
            libraryCache.unregisterTask(jobId, executionId);
            // remove all files in the distributed cache
            removeCachedFiles(distributedCacheEntries, fileCache);
            // close and de-activate safety net for task thread
            LOG.info("Ensuring all FileSystem streams are closed for task {}", this);
            FileSystemSafetyNet.closeSafetyNetAndGuardedResourcesForThread();
            notifyFinalState();
        } catch (Throwable t) {
            // an error in the resource cleanup is fatal
            String message = String.format("FATAL - exception in resource cleanup of task %s (%s).", taskNameWithSubtask, executionId);
            LOG.error(message, t);
            notifyFatalError(message, t);
        }
        // errors here will only be logged
        try {
            metrics.close();
        } catch (Throwable t) {
            LOG.error("Error during metrics de-registration of task {} ({}).", taskNameWithSubtask, executionId, t);
        }
    }
}
Also used : ExecutionState(org.apache.flink.runtime.execution.ExecutionState) HashMap(java.util.HashMap) TaskKvStateRegistry(org.apache.flink.runtime.query.TaskKvStateRegistry) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) AbstractInvokable(org.apache.flink.runtime.jobgraph.tasks.AbstractInvokable) Path(org.apache.flink.core.fs.Path) CheckpointDeclineTaskNotCheckpointingException(org.apache.flink.runtime.checkpoint.decline.CheckpointDeclineTaskNotCheckpointingException) TimeoutException(java.util.concurrent.TimeoutException) CancelTaskException(org.apache.flink.runtime.execution.CancelTaskException) RejectedExecutionException(java.util.concurrent.RejectedExecutionException) PartitionProducerDisposedException(org.apache.flink.runtime.jobmanager.PartitionProducerDisposedException) CheckpointDeclineTaskNotReadyException(org.apache.flink.runtime.checkpoint.decline.CheckpointDeclineTaskNotReadyException) IOException(java.io.IOException) ResultPartition(org.apache.flink.runtime.io.network.partition.ResultPartition) StatefulTask(org.apache.flink.runtime.jobgraph.tasks.StatefulTask) CancelTaskException(org.apache.flink.runtime.execution.CancelTaskException) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future) Environment(org.apache.flink.runtime.execution.Environment) NetworkEnvironment(org.apache.flink.runtime.io.network.NetworkEnvironment) Map(java.util.Map) HashMap(java.util.HashMap)

Aggregations

CancelTaskException (org.apache.flink.runtime.execution.CancelTaskException)43 IOException (java.io.IOException)16 Test (org.junit.Test)16 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)10 ResultSubpartitionView (org.apache.flink.runtime.io.network.partition.ResultSubpartitionView)10 ExceptionInChainedStubException (org.apache.flink.runtime.operators.chaining.ExceptionInChainedStubException)9 RejectedExecutionException (java.util.concurrent.RejectedExecutionException)7 Buffer (org.apache.flink.runtime.io.network.buffer.Buffer)6 CheckpointException (org.apache.flink.runtime.checkpoint.CheckpointException)5 ExecutorService (java.util.concurrent.ExecutorService)4 RuntimeContext (org.apache.flink.api.common.functions.RuntimeContext)4 Counter (org.apache.flink.metrics.Counter)4 SimpleCounter (org.apache.flink.metrics.SimpleCounter)4 InputChannelTestUtils.createLocalInputChannel (org.apache.flink.runtime.io.network.partition.InputChannelTestUtils.createLocalInputChannel)4 TestingResultPartitionManager (org.apache.flink.runtime.io.network.partition.consumer.SingleInputGateTest.TestingResultPartitionManager)4 InternalOperatorIOMetricGroup (org.apache.flink.runtime.metrics.groups.InternalOperatorIOMetricGroup)4 StreamConfig (org.apache.flink.streaming.api.graph.StreamConfig)4 FlinkException (org.apache.flink.util.FlinkException)4 WrappingRuntimeException (org.apache.flink.util.WrappingRuntimeException)4 File (java.io.File)3