use of org.apache.flink.runtime.jobmanager.scheduler.SlotSharingGroup in project flink by apache.
the class LocalInputPreferredSlotSharingStrategyTest method testGetExecutionSlotSharingGroupOfLateAttachedVertices.
@Test
public void testGetExecutionSlotSharingGroupOfLateAttachedVertices() {
JobVertexID jobVertexID1 = new JobVertexID();
JobVertexID jobVertexID2 = new JobVertexID();
JobVertexID jobVertexID3 = new JobVertexID();
final SlotSharingGroup slotSharingGroup1 = new SlotSharingGroup();
slotSharingGroup1.addVertexToGroup(jobVertexID1);
slotSharingGroup1.addVertexToGroup(jobVertexID2);
final SlotSharingGroup slotSharingGroup2 = new SlotSharingGroup();
slotSharingGroup2.addVertexToGroup(jobVertexID3);
TestingSchedulingTopology topology = new TestingSchedulingTopology();
TestingSchedulingExecutionVertex ev1 = topology.newExecutionVertex(jobVertexID1, 0);
TestingSchedulingExecutionVertex ev2 = topology.newExecutionVertex(jobVertexID2, 0);
topology.connect(ev1, ev2);
final LocalInputPreferredSlotSharingStrategy strategy = new LocalInputPreferredSlotSharingStrategy(topology, new HashSet<>(Arrays.asList(slotSharingGroup1, slotSharingGroup2)), Collections.emptySet());
assertThat(strategy.getExecutionSlotSharingGroups().size(), is(1));
assertThat(strategy.getExecutionSlotSharingGroup(ev1.getId()).getExecutionVertexIds(), containsInAnyOrder(ev1.getId(), ev2.getId()));
assertThat(strategy.getExecutionSlotSharingGroup(ev2.getId()).getExecutionVertexIds(), containsInAnyOrder(ev1.getId(), ev2.getId()));
// add new job vertices and notify scheduling topology updated
TestingSchedulingExecutionVertex ev3 = topology.newExecutionVertex(jobVertexID3, 0);
topology.connect(ev2, ev3, ResultPartitionType.BLOCKING);
strategy.notifySchedulingTopologyUpdated(topology, Collections.singletonList(ev3.getId()));
assertThat(strategy.getExecutionSlotSharingGroups().size(), is(2));
assertThat(strategy.getExecutionSlotSharingGroup(ev1.getId()).getExecutionVertexIds(), containsInAnyOrder(ev1.getId(), ev2.getId()));
assertThat(strategy.getExecutionSlotSharingGroup(ev2.getId()).getExecutionVertexIds(), containsInAnyOrder(ev1.getId(), ev2.getId()));
assertThat(strategy.getExecutionSlotSharingGroup(ev3.getId()).getExecutionVertexIds(), containsInAnyOrder(ev3.getId()));
}
use of org.apache.flink.runtime.jobmanager.scheduler.SlotSharingGroup in project flink by apache.
the class StreamingJobGraphGenerator method setManagedMemoryFraction.
private static void setManagedMemoryFraction(final Map<Integer, JobVertex> jobVertices, final Map<Integer, StreamConfig> operatorConfigs, final Map<Integer, Map<Integer, StreamConfig>> vertexChainedConfigs, final java.util.function.Function<Integer, Map<ManagedMemoryUseCase, Integer>> operatorScopeManagedMemoryUseCaseWeightsRetriever, final java.util.function.Function<Integer, Set<ManagedMemoryUseCase>> slotScopeManagedMemoryUseCasesRetriever) {
// all slot sharing groups in this job
final Set<SlotSharingGroup> slotSharingGroups = Collections.newSetFromMap(new IdentityHashMap<>());
// maps a job vertex ID to its head operator ID
final Map<JobVertexID, Integer> vertexHeadOperators = new HashMap<>();
// maps a job vertex ID to IDs of all operators in the vertex
final Map<JobVertexID, Set<Integer>> vertexOperators = new HashMap<>();
for (Map.Entry<Integer, JobVertex> entry : jobVertices.entrySet()) {
final int headOperatorId = entry.getKey();
final JobVertex jobVertex = entry.getValue();
final SlotSharingGroup jobVertexSlotSharingGroup = jobVertex.getSlotSharingGroup();
checkState(jobVertexSlotSharingGroup != null, "JobVertex slot sharing group must not be null");
slotSharingGroups.add(jobVertexSlotSharingGroup);
vertexHeadOperators.put(jobVertex.getID(), headOperatorId);
final Set<Integer> operatorIds = new HashSet<>();
operatorIds.add(headOperatorId);
operatorIds.addAll(vertexChainedConfigs.getOrDefault(headOperatorId, Collections.emptyMap()).keySet());
vertexOperators.put(jobVertex.getID(), operatorIds);
}
for (SlotSharingGroup slotSharingGroup : slotSharingGroups) {
setManagedMemoryFractionForSlotSharingGroup(slotSharingGroup, vertexHeadOperators, vertexOperators, operatorConfigs, vertexChainedConfigs, operatorScopeManagedMemoryUseCaseWeightsRetriever, slotScopeManagedMemoryUseCasesRetriever);
}
}
use of org.apache.flink.runtime.jobmanager.scheduler.SlotSharingGroup in project flink by apache.
the class StreamingJobGraphGenerator method setSlotSharing.
private void setSlotSharing() {
final Map<String, SlotSharingGroup> specifiedSlotSharingGroups = new HashMap<>();
final Map<JobVertexID, SlotSharingGroup> vertexRegionSlotSharingGroups = buildVertexRegionSlotSharingGroups();
for (Map.Entry<Integer, JobVertex> entry : jobVertices.entrySet()) {
final JobVertex vertex = entry.getValue();
final String slotSharingGroupKey = streamGraph.getStreamNode(entry.getKey()).getSlotSharingGroup();
checkNotNull(slotSharingGroupKey, "StreamNode slot sharing group must not be null");
final SlotSharingGroup effectiveSlotSharingGroup;
if (slotSharingGroupKey.equals(StreamGraphGenerator.DEFAULT_SLOT_SHARING_GROUP)) {
// fallback to the region slot sharing group by default
effectiveSlotSharingGroup = checkNotNull(vertexRegionSlotSharingGroups.get(vertex.getID()));
} else {
effectiveSlotSharingGroup = specifiedSlotSharingGroups.computeIfAbsent(slotSharingGroupKey, k -> {
SlotSharingGroup ssg = new SlotSharingGroup();
streamGraph.getSlotSharingGroupResource(k).ifPresent(ssg::setResourceProfile);
return ssg;
});
}
vertex.setSlotSharingGroup(effectiveSlotSharingGroup);
}
}
use of org.apache.flink.runtime.jobmanager.scheduler.SlotSharingGroup in project flink by apache.
the class StreamingJobGraphGenerator method setCoLocation.
private void setCoLocation() {
final Map<String, Tuple2<SlotSharingGroup, CoLocationGroupImpl>> coLocationGroups = new HashMap<>();
for (Map.Entry<Integer, JobVertex> entry : jobVertices.entrySet()) {
final StreamNode node = streamGraph.getStreamNode(entry.getKey());
final JobVertex vertex = entry.getValue();
final SlotSharingGroup sharingGroup = vertex.getSlotSharingGroup();
// configure co-location constraint
final String coLocationGroupKey = node.getCoLocationGroup();
if (coLocationGroupKey != null) {
if (sharingGroup == null) {
throw new IllegalStateException("Cannot use a co-location constraint without a slot sharing group");
}
Tuple2<SlotSharingGroup, CoLocationGroupImpl> constraint = coLocationGroups.computeIfAbsent(coLocationGroupKey, k -> new Tuple2<>(sharingGroup, new CoLocationGroupImpl()));
if (constraint.f0 != sharingGroup) {
throw new IllegalStateException("Cannot co-locate operators from different slot sharing groups");
}
vertex.updateCoLocationGroup(constraint.f1);
constraint.f1.addVertex(vertex);
}
}
}
use of org.apache.flink.runtime.jobmanager.scheduler.SlotSharingGroup in project flink by apache.
the class TaskCancelAsyncProducerConsumerITCase method testCancelAsyncProducerAndConsumer.
/**
* Tests that a task waiting on an async producer/consumer that is stuck in a blocking buffer
* request can be properly cancelled.
*
* <p>This is currently required for the Flink Kafka sources, which spawn a separate Thread
* consuming from Kafka and producing the intermediate streams in the spawned Thread instead of
* the main task Thread.
*/
@Test
public void testCancelAsyncProducerAndConsumer(@InjectMiniCluster MiniCluster flink) throws Exception {
Deadline deadline = Deadline.now().plus(Duration.ofMinutes(2));
// Job with async producer and consumer
JobVertex producer = new JobVertex("AsyncProducer");
producer.setParallelism(1);
producer.setInvokableClass(AsyncProducer.class);
JobVertex consumer = new JobVertex("AsyncConsumer");
consumer.setParallelism(1);
consumer.setInvokableClass(AsyncConsumer.class);
consumer.connectNewDataSetAsInput(producer, DistributionPattern.POINTWISE, ResultPartitionType.PIPELINED);
SlotSharingGroup slot = new SlotSharingGroup();
producer.setSlotSharingGroup(slot);
consumer.setSlotSharingGroup(slot);
JobGraph jobGraph = JobGraphTestUtils.streamingJobGraph(producer, consumer);
// Submit job and wait until running
flink.runDetached(jobGraph);
FutureUtils.retrySuccessfulWithDelay(() -> flink.getJobStatus(jobGraph.getJobID()), Time.milliseconds(10), deadline, status -> status == JobStatus.RUNNING, TestingUtils.defaultScheduledExecutor()).get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
boolean producerBlocked = false;
for (int i = 0; i < 50; i++) {
Thread thread = ASYNC_PRODUCER_THREAD;
if (thread != null && thread.isAlive()) {
StackTraceElement[] stackTrace = thread.getStackTrace();
producerBlocked = isInBlockingBufferRequest(stackTrace);
}
if (producerBlocked) {
break;
} else {
// Retry
Thread.sleep(500L);
}
}
// Verify that async producer is in blocking request
assertTrue("Producer thread is not blocked: " + Arrays.toString(ASYNC_PRODUCER_THREAD.getStackTrace()), producerBlocked);
boolean consumerWaiting = false;
for (int i = 0; i < 50; i++) {
Thread thread = ASYNC_CONSUMER_THREAD;
if (thread != null && thread.isAlive()) {
consumerWaiting = thread.getState() == Thread.State.WAITING;
}
if (consumerWaiting) {
break;
} else {
// Retry
Thread.sleep(500L);
}
}
// Verify that async consumer is in blocking request
assertTrue("Consumer thread is not blocked.", consumerWaiting);
flink.cancelJob(jobGraph.getJobID()).get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
// wait until the job is canceled
FutureUtils.retrySuccessfulWithDelay(() -> flink.getJobStatus(jobGraph.getJobID()), Time.milliseconds(10), deadline, status -> status == JobStatus.CANCELED, TestingUtils.defaultScheduledExecutor()).get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
// Verify the expected Exceptions
assertNotNull(ASYNC_PRODUCER_EXCEPTION);
assertEquals(CancelTaskException.class, ASYNC_PRODUCER_EXCEPTION.getClass());
assertNotNull(ASYNC_CONSUMER_EXCEPTION);
assertEquals(IllegalStateException.class, ASYNC_CONSUMER_EXCEPTION.getClass());
}
Aggregations