use of org.apache.flink.runtime.jobmanager.scheduler.NoResourceAvailableException in project flink by apache.
the class PhysicalSlotRequestBulkCheckerImpl method schedulePendingRequestBulkWithTimestampCheck.
private void schedulePendingRequestBulkWithTimestampCheck(final PhysicalSlotRequestBulkWithTimestamp bulk, final Time timeout) {
componentMainThreadExecutor.schedule(() -> {
TimeoutCheckResult result = checkPhysicalSlotRequestBulkTimeout(bulk, timeout);
switch(result) {
case PENDING:
// re-schedule the timeout check
schedulePendingRequestBulkWithTimestampCheck(bulk, timeout);
break;
case TIMEOUT:
Throwable cancellationCause = new NoResourceAvailableException("Slot request bulk is not fulfillable! Could not allocate the required slot within slot request timeout", new TimeoutException("Timeout has occurred: " + timeout));
bulk.cancel(cancellationCause);
break;
case FULFILLED:
default:
// no action to take
break;
}
}, timeout.getSize(), timeout.getUnit());
}
use of org.apache.flink.runtime.jobmanager.scheduler.NoResourceAvailableException in project flink by apache.
the class PipelinedRegionSchedulingITCase method testFailsOnInsufficientSlots.
@Test
public void testFailsOnInsufficientSlots() throws Exception {
final JobResult jobResult = executeSchedulingTest(1);
assertThat(jobResult.getSerializedThrowable().isPresent(), is(true));
final Throwable jobFailure = jobResult.getSerializedThrowable().get().deserializeError(ClassLoader.getSystemClassLoader());
final Optional<NoResourceAvailableException> cause = ExceptionUtils.findThrowable(jobFailure, NoResourceAvailableException.class);
assertThat(cause.isPresent(), is(true));
assertThat(cause.get().getMessage(), containsString("Slot request bulk is not fulfillable!"));
}
use of org.apache.flink.runtime.jobmanager.scheduler.NoResourceAvailableException in project flink by apache.
the class ExecutionGraph method scheduleEager.
/**
*
*
* @param slotProvider The resource provider from which the slots are allocated
* @param timeout The maximum time that the deployment may take, before a
* TimeoutException is thrown.
*/
private void scheduleEager(SlotProvider slotProvider, final Time timeout) {
checkState(state == JobStatus.RUNNING, "job is not running currently");
// Important: reserve all the space we need up front.
// that way we do not have any operation that can fail between allocating the slots
// and adding them to the list. If we had a failure in between there, that would
// cause the slots to get lost
final ArrayList<ExecutionAndSlot[]> resources = new ArrayList<>(getNumberOfExecutionJobVertices());
final boolean queued = allowQueuedScheduling;
// we use this flag to handle failures in a 'finally' clause
// that allows us to not go through clumsy cast-and-rethrow logic
boolean successful = false;
try {
// collecting all the slots may resize and fail in that operation without slots getting lost
final ArrayList<Future<SimpleSlot>> slotFutures = new ArrayList<>(getNumberOfExecutionJobVertices());
// allocate the slots (obtain all their futures
for (ExecutionJobVertex ejv : getVerticesTopologically()) {
// these calls are not blocking, they only return futures
ExecutionAndSlot[] slots = ejv.allocateResourcesForAll(slotProvider, queued);
// we need to first add the slots to this list, to be safe on release
resources.add(slots);
for (ExecutionAndSlot ens : slots) {
slotFutures.add(ens.slotFuture);
}
}
// this future is complete once all slot futures are complete.
// the future fails once one slot future fails.
final ConjunctFuture allAllocationsComplete = FutureUtils.combineAll(slotFutures);
// make sure that we fail if the allocation timeout was exceeded
final ScheduledFuture<?> timeoutCancelHandle = futureExecutor.schedule(new Runnable() {
@Override
public void run() {
// When the timeout triggers, we try to complete the conjunct future with an exception.
// Note that this is a no-op if the future is already completed
int numTotal = allAllocationsComplete.getNumFuturesTotal();
int numComplete = allAllocationsComplete.getNumFuturesCompleted();
String message = "Could not allocate all requires slots within timeout of " + timeout + ". Slots required: " + numTotal + ", slots allocated: " + numComplete;
allAllocationsComplete.completeExceptionally(new NoResourceAvailableException(message));
}
}, timeout.getSize(), timeout.getUnit());
allAllocationsComplete.handleAsync(new BiFunction<Void, Throwable, Void>() {
@Override
public Void apply(Void ignored, Throwable throwable) {
try {
// we do not need the cancellation timeout any more
timeoutCancelHandle.cancel(false);
if (throwable == null) {
for (ExecutionAndSlot[] jobVertexTasks : resources) {
for (ExecutionAndSlot execAndSlot : jobVertexTasks) {
// the futures must all be ready - this is simply a sanity check
final SimpleSlot slot;
try {
slot = execAndSlot.slotFuture.getNow(null);
checkNotNull(slot);
} catch (ExecutionException | NullPointerException e) {
throw new IllegalStateException("SlotFuture is incomplete " + "or erroneous even though all futures completed");
}
// actual deployment
execAndSlot.executionAttempt.deployToSlot(slot);
}
}
} else {
// let the exception handler deal with this
throw throwable;
}
} catch (Throwable t) {
// we need to go into recovery and make sure to release all slots
try {
fail(t);
} finally {
ExecutionGraphUtils.releaseAllSlotsSilently(resources);
}
}
// return (Void) Unsafe.getUnsafe().allocateInstance(Void.class);
return null;
}
}, futureExecutor);
// from now on, slots will be rescued by the the futures and their completion, or by the timeout
successful = true;
} finally {
if (!successful) {
// we come here only if the 'try' block finished with an exception
// we release the slots (possibly failing some executions on the way) and
// let the exception bubble up
ExecutionGraphUtils.releaseAllSlotsSilently(resources);
}
}
}
use of org.apache.flink.runtime.jobmanager.scheduler.NoResourceAvailableException in project flink by apache.
the class KafkaConsumerTestBase method runFailOnDeployTest.
/**
* Tests that the source can be properly canceled when reading full partitions.
*/
public void runFailOnDeployTest() throws Exception {
final String topic = "failOnDeployTopic";
createTestTopic(topic, 2, 1);
DeserializationSchema<Integer> schema = new TypeInformationSerializationSchema<>(BasicTypeInfo.INT_TYPE_INFO, new ExecutionConfig());
StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
// needs to be more that the mini cluster has slots
env.setParallelism(12);
env.getConfig().disableSysoutLogging();
Properties props = new Properties();
props.putAll(standardProps);
props.putAll(secureProps);
FlinkKafkaConsumerBase<Integer> kafkaSource = kafkaServer.getConsumer(topic, schema, props);
env.addSource(kafkaSource).addSink(new DiscardingSink<Integer>());
try {
env.execute("test fail on deploy");
fail("this test should fail with an exception");
} catch (ProgramInvocationException e) {
// validate that we failed due to a NoResourceAvailableException
Throwable cause = e.getCause();
int depth = 0;
boolean foundResourceException = false;
while (cause != null && depth++ < 20) {
if (cause instanceof NoResourceAvailableException) {
foundResourceException = true;
break;
}
cause = cause.getCause();
}
assertTrue("Wrong exception", foundResourceException);
}
deleteTestTopic(topic);
}
Aggregations