use of org.apache.flink.runtime.jobgraph.JobVertexID in project flink by apache.
the class ScheduleWithCoLocationHintTest method testSlotReleasedInBetween.
@Test
public void testSlotReleasedInBetween() {
try {
JobVertexID jid1 = new JobVertexID();
JobVertexID jid2 = new JobVertexID();
Scheduler scheduler = new Scheduler(TestingUtils.directExecutionContext());
Instance i1 = getRandomInstance(1);
Instance i2 = getRandomInstance(1);
TaskManagerLocation loc1 = i1.getTaskManagerLocation();
TaskManagerLocation loc2 = i2.getTaskManagerLocation();
scheduler.newInstanceAvailable(i2);
scheduler.newInstanceAvailable(i1);
assertEquals(2, scheduler.getNumberOfAvailableSlots());
SlotSharingGroup sharingGroup = new SlotSharingGroup();
CoLocationGroup ccg = new CoLocationGroup();
CoLocationConstraint cc1 = new CoLocationConstraint(ccg);
CoLocationConstraint cc2 = new CoLocationConstraint(ccg);
SimpleSlot s1 = scheduler.allocateSlot(new ScheduledUnit(getTestVertexWithLocation(jid1, 0, 2, loc1), sharingGroup, cc1), false).get();
SimpleSlot s2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertexWithLocation(jid1, 1, 2, loc2), sharingGroup, cc2), false).get();
s1.releaseSlot();
s2.releaseSlot();
assertEquals(2, scheduler.getNumberOfAvailableSlots());
assertEquals(0, sharingGroup.getTaskAssignment().getNumberOfSlots());
SimpleSlot s3 = scheduler.allocateSlot(new ScheduledUnit(getTestVertexWithLocation(jid2, 0, 2, loc2), sharingGroup, cc1), false).get();
SimpleSlot s4 = scheduler.allocateSlot(new ScheduledUnit(getTestVertexWithLocation(jid2, 1, 2, loc1), sharingGroup, cc2), false).get();
// still preserves the previous instance mapping)
assertEquals(i1.getTaskManagerID(), s3.getTaskManagerID());
assertEquals(i2.getTaskManagerID(), s4.getTaskManagerID());
s3.releaseSlot();
s4.releaseSlot();
assertEquals(2, scheduler.getNumberOfAvailableSlots());
assertEquals(4, scheduler.getNumberOfLocalizedAssignments());
assertEquals(0, scheduler.getNumberOfNonLocalizedAssignments());
assertEquals(0, scheduler.getNumberOfUnconstrainedAssignments());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.runtime.jobgraph.JobVertexID in project flink by apache.
the class SchedulerSlotSharingTest method testSequentialAllocateAndRelease.
@Test
public void testSequentialAllocateAndRelease() {
try {
final JobVertexID jid1 = new JobVertexID();
final JobVertexID jid2 = new JobVertexID();
final JobVertexID jid3 = new JobVertexID();
final JobVertexID jid4 = new JobVertexID();
final SlotSharingGroup sharingGroup = new SlotSharingGroup(jid1, jid2, jid3, jid4);
final Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext());
scheduler.newInstanceAvailable(getRandomInstance(4));
// allocate something from group 1 and 2 interleaved with schedule for group 3
SimpleSlot slot_1_1 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 0, 4), sharingGroup), false).get();
SimpleSlot slot_1_2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 1, 4), sharingGroup), false).get();
SimpleSlot slot_2_1 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 0, 4), sharingGroup), false).get();
SimpleSlot slot_2_2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 1, 4), sharingGroup), false).get();
SimpleSlot slot_3 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid3, 0, 1), sharingGroup), false).get();
SimpleSlot slot_1_3 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 2, 4), sharingGroup), false).get();
SimpleSlot slot_1_4 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 3, 4), sharingGroup), false).get();
SimpleSlot slot_2_3 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 2, 4), sharingGroup), false).get();
SimpleSlot slot_2_4 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 3, 4), sharingGroup), false).get();
// release groups 1 and 2
slot_1_1.releaseSlot();
slot_1_2.releaseSlot();
slot_1_3.releaseSlot();
slot_1_4.releaseSlot();
slot_2_1.releaseSlot();
slot_2_2.releaseSlot();
slot_2_3.releaseSlot();
slot_2_4.releaseSlot();
// allocate group 4
SimpleSlot slot_4_1 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid4, 0, 4), sharingGroup), false).get();
SimpleSlot slot_4_2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid4, 1, 4), sharingGroup), false).get();
SimpleSlot slot_4_3 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid4, 2, 4), sharingGroup), false).get();
SimpleSlot slot_4_4 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid4, 3, 4), sharingGroup), false).get();
// release groups 3 and 4
slot_3.releaseSlot();
slot_4_1.releaseSlot();
slot_4_2.releaseSlot();
slot_4_3.releaseSlot();
slot_4_4.releaseSlot();
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.runtime.jobgraph.JobVertexID in project flink by apache.
the class StreamTaskTest method createTask.
public static Task createTask(Class<? extends AbstractInvokable> invokable, StreamConfig taskConfig, Configuration taskManagerConfig) throws Exception {
LibraryCacheManager libCache = mock(LibraryCacheManager.class);
when(libCache.getClassLoader(any(JobID.class))).thenReturn(StreamTaskTest.class.getClassLoader());
ResultPartitionManager partitionManager = mock(ResultPartitionManager.class);
ResultPartitionConsumableNotifier consumableNotifier = mock(ResultPartitionConsumableNotifier.class);
PartitionProducerStateChecker partitionProducerStateChecker = mock(PartitionProducerStateChecker.class);
Executor executor = mock(Executor.class);
NetworkEnvironment network = mock(NetworkEnvironment.class);
when(network.getResultPartitionManager()).thenReturn(partitionManager);
when(network.getDefaultIOMode()).thenReturn(IOManager.IOMode.SYNC);
when(network.createKvStateTaskRegistry(any(JobID.class), any(JobVertexID.class))).thenReturn(mock(TaskKvStateRegistry.class));
JobInformation jobInformation = new JobInformation(new JobID(), "Job Name", new SerializedValue<>(new ExecutionConfig()), new Configuration(), Collections.<BlobKey>emptyList(), Collections.<URL>emptyList());
TaskInformation taskInformation = new TaskInformation(new JobVertexID(), "Test Task", 1, 1, invokable.getName(), taskConfig.getConfiguration());
return new Task(jobInformation, taskInformation, new ExecutionAttemptID(), new AllocationID(), 0, 0, Collections.<ResultPartitionDeploymentDescriptor>emptyList(), Collections.<InputGateDeploymentDescriptor>emptyList(), 0, new TaskStateHandles(), mock(MemoryManager.class), mock(IOManager.class), network, mock(BroadcastVariableManager.class), mock(TaskManagerActions.class), mock(InputSplitProvider.class), mock(CheckpointResponder.class), libCache, mock(FileCache.class), new TestingTaskManagerRuntimeInfo(taskManagerConfig, new String[] { System.getProperty("java.io.tmpdir") }), new UnregisteredTaskMetricsGroup(), consumableNotifier, partitionProducerStateChecker, executor);
}
use of org.apache.flink.runtime.jobgraph.JobVertexID in project flink by apache.
the class SavepointLoader method loadAndValidateSavepoint.
/**
* Loads a savepoint back as a {@link CompletedCheckpoint}.
*
* <p>This method verifies that tasks and parallelism still match the savepoint parameters.
*
* @param jobId The JobID of the job to load the savepoint for.
* @param tasks Tasks that will possibly be reset
* @param savepointPath The path of the savepoint to rollback to
* @param classLoader The class loader to resolve serialized classes in legacy savepoint versions.
* @param allowNonRestoredState Allow to skip checkpoint state that cannot be mapped
* to any job vertex in tasks.
*
* @throws IllegalStateException If mismatch between program and savepoint state
* @throws IOException If savepoint store failure
*/
public static CompletedCheckpoint loadAndValidateSavepoint(JobID jobId, Map<JobVertexID, ExecutionJobVertex> tasks, String savepointPath, ClassLoader classLoader, boolean allowNonRestoredState) throws IOException {
// (1) load the savepoint
final Tuple2<Savepoint, StreamStateHandle> savepointAndHandle = SavepointStore.loadSavepointWithHandle(savepointPath, classLoader);
final Savepoint savepoint = savepointAndHandle.f0;
final StreamStateHandle metadataHandle = savepointAndHandle.f1;
final Map<JobVertexID, TaskState> taskStates = new HashMap<>(savepoint.getTaskStates().size());
boolean expandedToLegacyIds = false;
// (2) validate it (parallelism, etc)
for (TaskState taskState : savepoint.getTaskStates()) {
ExecutionJobVertex executionJobVertex = tasks.get(taskState.getJobVertexID());
// for example as generated from older flink versions, to provide backwards compatibility.
if (executionJobVertex == null && !expandedToLegacyIds) {
tasks = ExecutionJobVertex.includeLegacyJobVertexIDs(tasks);
executionJobVertex = tasks.get(taskState.getJobVertexID());
expandedToLegacyIds = true;
LOG.info("Could not find ExecutionJobVertex. Including legacy JobVertexIDs in search.");
}
if (executionJobVertex != null) {
if (executionJobVertex.getMaxParallelism() == taskState.getMaxParallelism() || !executionJobVertex.isMaxParallelismConfigured()) {
taskStates.put(taskState.getJobVertexID(), taskState);
} else {
String msg = String.format("Failed to rollback to savepoint %s. " + "Max parallelism mismatch between savepoint state and new program. " + "Cannot map operator %s with max parallelism %d to new program with " + "max parallelism %d. This indicates that the program has been changed " + "in a non-compatible way after the savepoint.", savepoint, taskState.getJobVertexID(), taskState.getMaxParallelism(), executionJobVertex.getMaxParallelism());
throw new IllegalStateException(msg);
}
} else if (allowNonRestoredState) {
LOG.info("Skipping savepoint state for operator {}.", taskState.getJobVertexID());
} else {
String msg = String.format("Failed to rollback to savepoint %s. " + "Cannot map savepoint state for operator %s to the new program, " + "because the operator is not available in the new program. If " + "you want to allow to skip this, you can set the --allowNonRestoredState " + "option on the CLI.", savepointPath, taskState.getJobVertexID());
throw new IllegalStateException(msg);
}
}
// (3) convert to checkpoint so the system can fall back to it
CheckpointProperties props = CheckpointProperties.forStandardSavepoint();
return new CompletedCheckpoint(jobId, savepoint.getCheckpointId(), 0L, 0L, taskStates, props, metadataHandle, savepointPath);
}
use of org.apache.flink.runtime.jobgraph.JobVertexID in project flink by apache.
the class PendingCheckpoint method acknowledgeTask.
/**
* Acknowledges the task with the given execution attempt id and the given subtask state.
*
* @param executionAttemptId of the acknowledged task
* @param subtaskState of the acknowledged task
* @param metrics Checkpoint metrics for the stats
* @return TaskAcknowledgeResult of the operation
*/
public TaskAcknowledgeResult acknowledgeTask(ExecutionAttemptID executionAttemptId, SubtaskState subtaskState, CheckpointMetrics metrics) {
synchronized (lock) {
if (discarded) {
return TaskAcknowledgeResult.DISCARDED;
}
final ExecutionVertex vertex = notYetAcknowledgedTasks.remove(executionAttemptId);
if (vertex == null) {
if (acknowledgedTasks.contains(executionAttemptId)) {
return TaskAcknowledgeResult.DUPLICATE;
} else {
return TaskAcknowledgeResult.UNKNOWN;
}
} else {
acknowledgedTasks.add(executionAttemptId);
}
JobVertexID jobVertexID = vertex.getJobvertexId();
int subtaskIndex = vertex.getParallelSubtaskIndex();
long ackTimestamp = System.currentTimeMillis();
long stateSize = 0;
if (null != subtaskState) {
TaskState taskState = taskStates.get(jobVertexID);
if (null == taskState) {
@SuppressWarnings("deprecation") ChainedStateHandle<StreamStateHandle> nonPartitionedState = subtaskState.getLegacyOperatorState();
ChainedStateHandle<OperatorStateHandle> partitioneableState = subtaskState.getManagedOperatorState();
//TODO this should go away when we remove chained state, assigning state to operators directly instead
int chainLength;
if (nonPartitionedState != null) {
chainLength = nonPartitionedState.getLength();
} else if (partitioneableState != null) {
chainLength = partitioneableState.getLength();
} else {
chainLength = 1;
}
taskState = new TaskState(jobVertexID, vertex.getTotalNumberOfParallelSubtasks(), vertex.getMaxParallelism(), chainLength);
taskStates.put(jobVertexID, taskState);
}
taskState.putState(subtaskIndex, subtaskState);
stateSize = subtaskState.getStateSize();
}
++numAcknowledgedTasks;
// publish the checkpoint statistics
// to prevent null-pointers from concurrent modification, copy reference onto stack
final PendingCheckpointStats statsCallback = this.statsCallback;
if (statsCallback != null) {
// Do this in millis because the web frontend works with them
long alignmentDurationMillis = metrics.getAlignmentDurationNanos() / 1_000_000;
SubtaskStateStats subtaskStateStats = new SubtaskStateStats(subtaskIndex, ackTimestamp, stateSize, metrics.getSyncDurationMillis(), metrics.getAsyncDurationMillis(), metrics.getBytesBufferedInAlignment(), alignmentDurationMillis);
statsCallback.reportSubtaskStats(jobVertexID, subtaskStateStats);
}
return TaskAcknowledgeResult.SUCCESS;
}
}
Aggregations