use of org.apache.flink.util.FlinkException in project flink by apache.
the class FutureUtilsTest method testHandleUncaughtExceptionWithExceptionallyCompletion.
@Test
public void testHandleUncaughtExceptionWithExceptionallyCompletion() {
final CompletableFuture<String> future = new CompletableFuture<>();
final TestingUncaughtExceptionHandler uncaughtExceptionHandler = new TestingUncaughtExceptionHandler();
FutureUtils.handleUncaughtException(future, uncaughtExceptionHandler);
assertThat(uncaughtExceptionHandler.hasBeenCalled(), is(false));
future.completeExceptionally(new FlinkException("barfoo"));
assertThat(uncaughtExceptionHandler.hasBeenCalled(), is(true));
}
use of org.apache.flink.util.FlinkException in project flink by apache.
the class DefaultExecutionGraph method cancel.
@Override
public void cancel() {
assertRunningInJobMasterMainThread();
while (true) {
JobStatus current = state;
if (current == JobStatus.RUNNING || current == JobStatus.CREATED || current == JobStatus.RESTARTING) {
if (transitionState(current, JobStatus.CANCELLING)) {
incrementRestarts();
final CompletableFuture<Void> ongoingSchedulingFuture = schedulingFuture;
// cancel ongoing scheduling action
if (ongoingSchedulingFuture != null) {
ongoingSchedulingFuture.cancel(false);
}
final ConjunctFuture<Void> allTerminal = cancelVerticesAsync();
allTerminal.whenComplete((Void value, Throwable throwable) -> {
if (throwable != null) {
transitionState(JobStatus.CANCELLING, JobStatus.FAILED, new FlinkException("Could not cancel job " + getJobName() + " because not all execution job vertices could be cancelled.", throwable));
} else {
// cancellations may currently be overridden by failures which
// trigger
// restarts, so we need to pass a proper restart global version
// here
allVerticesInTerminalState();
}
});
return;
}
} else // all vertices to be in their final state.
if (current == JobStatus.FAILING) {
if (transitionState(current, JobStatus.CANCELLING)) {
return;
}
} else {
// no need to treat other states
return;
}
}
}
use of org.apache.flink.util.FlinkException in project flink by apache.
the class DefaultDispatcherResourceManagerComponentFactory method create.
@Override
public DispatcherResourceManagerComponent create(Configuration configuration, ResourceID resourceId, Executor ioExecutor, RpcService rpcService, HighAvailabilityServices highAvailabilityServices, BlobServer blobServer, HeartbeatServices heartbeatServices, MetricRegistry metricRegistry, ExecutionGraphInfoStore executionGraphInfoStore, MetricQueryServiceRetriever metricQueryServiceRetriever, FatalErrorHandler fatalErrorHandler) throws Exception {
LeaderRetrievalService dispatcherLeaderRetrievalService = null;
LeaderRetrievalService resourceManagerRetrievalService = null;
WebMonitorEndpoint<?> webMonitorEndpoint = null;
ResourceManagerService resourceManagerService = null;
DispatcherRunner dispatcherRunner = null;
try {
dispatcherLeaderRetrievalService = highAvailabilityServices.getDispatcherLeaderRetriever();
resourceManagerRetrievalService = highAvailabilityServices.getResourceManagerLeaderRetriever();
final LeaderGatewayRetriever<DispatcherGateway> dispatcherGatewayRetriever = new RpcGatewayRetriever<>(rpcService, DispatcherGateway.class, DispatcherId::fromUuid, new ExponentialBackoffRetryStrategy(12, Duration.ofMillis(10), Duration.ofMillis(50)));
final LeaderGatewayRetriever<ResourceManagerGateway> resourceManagerGatewayRetriever = new RpcGatewayRetriever<>(rpcService, ResourceManagerGateway.class, ResourceManagerId::fromUuid, new ExponentialBackoffRetryStrategy(12, Duration.ofMillis(10), Duration.ofMillis(50)));
final ScheduledExecutorService executor = WebMonitorEndpoint.createExecutorService(configuration.getInteger(RestOptions.SERVER_NUM_THREADS), configuration.getInteger(RestOptions.SERVER_THREAD_PRIORITY), "DispatcherRestEndpoint");
final long updateInterval = configuration.getLong(MetricOptions.METRIC_FETCHER_UPDATE_INTERVAL);
final MetricFetcher metricFetcher = updateInterval == 0 ? VoidMetricFetcher.INSTANCE : MetricFetcherImpl.fromConfiguration(configuration, metricQueryServiceRetriever, dispatcherGatewayRetriever, executor);
webMonitorEndpoint = restEndpointFactory.createRestEndpoint(configuration, dispatcherGatewayRetriever, resourceManagerGatewayRetriever, blobServer, executor, metricFetcher, highAvailabilityServices.getClusterRestEndpointLeaderElectionService(), fatalErrorHandler);
log.debug("Starting Dispatcher REST endpoint.");
webMonitorEndpoint.start();
final String hostname = RpcUtils.getHostname(rpcService);
resourceManagerService = ResourceManagerServiceImpl.create(resourceManagerFactory, configuration, resourceId, rpcService, highAvailabilityServices, heartbeatServices, fatalErrorHandler, new ClusterInformation(hostname, blobServer.getPort()), webMonitorEndpoint.getRestBaseUrl(), metricRegistry, hostname, ioExecutor);
final HistoryServerArchivist historyServerArchivist = HistoryServerArchivist.createHistoryServerArchivist(configuration, webMonitorEndpoint, ioExecutor);
final DispatcherOperationCaches dispatcherOperationCaches = new DispatcherOperationCaches(configuration.get(RestOptions.ASYNC_OPERATION_STORE_DURATION));
final PartialDispatcherServices partialDispatcherServices = new PartialDispatcherServices(configuration, highAvailabilityServices, resourceManagerGatewayRetriever, blobServer, heartbeatServices, () -> JobManagerMetricGroup.createJobManagerMetricGroup(metricRegistry, hostname), executionGraphInfoStore, fatalErrorHandler, historyServerArchivist, metricRegistry.getMetricQueryServiceGatewayRpcAddress(), ioExecutor, dispatcherOperationCaches);
log.debug("Starting Dispatcher.");
dispatcherRunner = dispatcherRunnerFactory.createDispatcherRunner(highAvailabilityServices.getDispatcherLeaderElectionService(), fatalErrorHandler, new HaServicesJobPersistenceComponentFactory(highAvailabilityServices), ioExecutor, rpcService, partialDispatcherServices);
log.debug("Starting ResourceManagerService.");
resourceManagerService.start();
resourceManagerRetrievalService.start(resourceManagerGatewayRetriever);
dispatcherLeaderRetrievalService.start(dispatcherGatewayRetriever);
return new DispatcherResourceManagerComponent(dispatcherRunner, resourceManagerService, dispatcherLeaderRetrievalService, resourceManagerRetrievalService, webMonitorEndpoint, fatalErrorHandler, dispatcherOperationCaches);
} catch (Exception exception) {
// clean up all started components
if (dispatcherLeaderRetrievalService != null) {
try {
dispatcherLeaderRetrievalService.stop();
} catch (Exception e) {
exception = ExceptionUtils.firstOrSuppressed(e, exception);
}
}
if (resourceManagerRetrievalService != null) {
try {
resourceManagerRetrievalService.stop();
} catch (Exception e) {
exception = ExceptionUtils.firstOrSuppressed(e, exception);
}
}
final Collection<CompletableFuture<Void>> terminationFutures = new ArrayList<>(3);
if (webMonitorEndpoint != null) {
terminationFutures.add(webMonitorEndpoint.closeAsync());
}
if (resourceManagerService != null) {
terminationFutures.add(resourceManagerService.closeAsync());
}
if (dispatcherRunner != null) {
terminationFutures.add(dispatcherRunner.closeAsync());
}
final FutureUtils.ConjunctFuture<Void> terminationFuture = FutureUtils.completeAll(terminationFutures);
try {
terminationFuture.get();
} catch (Exception e) {
exception = ExceptionUtils.firstOrSuppressed(e, exception);
}
throw new FlinkException("Could not create the DispatcherResourceManagerComponent.", exception);
}
}
use of org.apache.flink.util.FlinkException in project flink by apache.
the class Execution method finishCancellation.
private void finishCancellation(boolean releasePartitions) {
releaseAssignedResource(new FlinkException("Execution " + this + " was cancelled."));
vertex.getExecutionGraphAccessor().deregisterExecution(this);
handlePartitionCleanup(releasePartitions, releasePartitions);
}
use of org.apache.flink.util.FlinkException in project flink by apache.
the class JobDispatcherLeaderProcessFactoryFactory method createFactory.
@Override
public JobDispatcherLeaderProcessFactory createFactory(JobPersistenceComponentFactory jobPersistenceComponentFactory, Executor ioExecutor, RpcService rpcService, PartialDispatcherServices partialDispatcherServices, FatalErrorHandler fatalErrorHandler) {
final JobGraph jobGraph;
try {
jobGraph = Preconditions.checkNotNull(jobGraphRetriever.retrieveJobGraph(partialDispatcherServices.getConfiguration()));
} catch (FlinkException e) {
throw new FlinkRuntimeException("Could not retrieve the JobGraph.", e);
}
final JobResultStore jobResultStore = jobPersistenceComponentFactory.createJobResultStore();
final Collection<JobResult> recoveredDirtyJobResults = getDirtyJobResults(jobResultStore);
final Optional<JobResult> maybeRecoveredDirtyJobResult = extractDirtyJobResult(recoveredDirtyJobResults, jobGraph);
final Optional<JobGraph> maybeJobGraph = getJobGraphBasedOnDirtyJobResults(jobGraph, recoveredDirtyJobResults);
final DefaultDispatcherGatewayServiceFactory defaultDispatcherServiceFactory = new DefaultDispatcherGatewayServiceFactory(JobDispatcherFactory.INSTANCE, rpcService, partialDispatcherServices);
return new JobDispatcherLeaderProcessFactory(defaultDispatcherServiceFactory, maybeJobGraph.orElse(null), maybeRecoveredDirtyJobResult.orElse(null), jobResultStore, fatalErrorHandler);
}
Aggregations