Search in sources :

Example 16 with OperatorIDPair

use of org.apache.flink.runtime.OperatorIDPair in project flink by apache.

the class CheckpointCoordinatorRestoringTest method testRestoreLatestCheckpointedStateWithChangingParallelism.

/**
 * Tests the checkpoint restoration with changing parallelism of job vertex with partitioned
 * state.
 */
private void testRestoreLatestCheckpointedStateWithChangingParallelism(boolean scaleOut) throws Exception {
    final JobVertexID jobVertexID1 = new JobVertexID();
    final JobVertexID jobVertexID2 = new JobVertexID();
    int parallelism1 = 3;
    int parallelism2 = scaleOut ? 2 : 13;
    int maxParallelism1 = 42;
    int maxParallelism2 = 13;
    int newParallelism2 = scaleOut ? 13 : 2;
    CompletedCheckpointStore completedCheckpointStore = new EmbeddedCompletedCheckpointStore();
    final ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID1, parallelism1, maxParallelism1).addJobVertex(jobVertexID2, parallelism2, maxParallelism2).build();
    final ExecutionJobVertex jobVertex1 = graph.getJobVertex(jobVertexID1);
    final ExecutionJobVertex jobVertex2 = graph.getJobVertex(jobVertexID2);
    // set up the coordinator and validate the initial state
    CheckpointCoordinator coord = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setCompletedCheckpointStore(completedCheckpointStore).setTimer(manuallyTriggeredScheduledExecutor).build();
    // trigger the checkpoint
    coord.triggerCheckpoint(false);
    manuallyTriggeredScheduledExecutor.triggerAll();
    assertEquals(1, coord.getPendingCheckpoints().size());
    long checkpointId = Iterables.getOnlyElement(coord.getPendingCheckpoints().keySet());
    List<KeyGroupRange> keyGroupPartitions1 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism1, parallelism1);
    List<KeyGroupRange> keyGroupPartitions2 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism2, parallelism2);
    // vertex 1
    for (int index = 0; index < jobVertex1.getParallelism(); index++) {
        OperatorStateHandle opStateBackend = generatePartitionableStateHandle(jobVertexID1, index, 2, 8, false);
        KeyGroupsStateHandle keyedStateBackend = generateKeyGroupState(jobVertexID1, keyGroupPartitions1.get(index), false);
        KeyGroupsStateHandle keyedStateRaw = generateKeyGroupState(jobVertexID1, keyGroupPartitions1.get(index), true);
        OperatorSubtaskState operatorSubtaskState = OperatorSubtaskState.builder().setManagedOperatorState(opStateBackend).setManagedKeyedState(keyedStateBackend).setRawKeyedState(keyedStateRaw).setInputChannelState(StateObjectCollection.singleton(createNewInputChannelStateHandle(3, new Random()))).build();
        TaskStateSnapshot taskOperatorSubtaskStates = new TaskStateSnapshot();
        taskOperatorSubtaskStates.putSubtaskStateByOperatorID(OperatorID.fromJobVertexID(jobVertexID1), operatorSubtaskState);
        AcknowledgeCheckpoint acknowledgeCheckpoint = new AcknowledgeCheckpoint(graph.getJobID(), jobVertex1.getTaskVertices()[index].getCurrentExecutionAttempt().getAttemptId(), checkpointId, new CheckpointMetrics(), taskOperatorSubtaskStates);
        coord.receiveAcknowledgeMessage(acknowledgeCheckpoint, TASK_MANAGER_LOCATION_INFO);
    }
    // vertex 2
    final List<ChainedStateHandle<OperatorStateHandle>> expectedOpStatesBackend = new ArrayList<>(jobVertex2.getParallelism());
    final List<ChainedStateHandle<OperatorStateHandle>> expectedOpStatesRaw = new ArrayList<>(jobVertex2.getParallelism());
    for (int index = 0; index < jobVertex2.getParallelism(); index++) {
        KeyGroupsStateHandle keyedStateBackend = generateKeyGroupState(jobVertexID2, keyGroupPartitions2.get(index), false);
        KeyGroupsStateHandle keyedStateRaw = generateKeyGroupState(jobVertexID2, keyGroupPartitions2.get(index), true);
        OperatorStateHandle opStateBackend = generatePartitionableStateHandle(jobVertexID2, index, 2, 8, false);
        OperatorStateHandle opStateRaw = generatePartitionableStateHandle(jobVertexID2, index, 2, 8, true);
        expectedOpStatesBackend.add(new ChainedStateHandle<>(singletonList(opStateBackend)));
        expectedOpStatesRaw.add(new ChainedStateHandle<>(singletonList(opStateRaw)));
        OperatorSubtaskState operatorSubtaskState = OperatorSubtaskState.builder().setManagedOperatorState(opStateBackend).setRawOperatorState(opStateRaw).setManagedKeyedState(keyedStateBackend).setRawKeyedState(keyedStateRaw).build();
        TaskStateSnapshot taskOperatorSubtaskStates = new TaskStateSnapshot();
        taskOperatorSubtaskStates.putSubtaskStateByOperatorID(OperatorID.fromJobVertexID(jobVertexID2), operatorSubtaskState);
        AcknowledgeCheckpoint acknowledgeCheckpoint = new AcknowledgeCheckpoint(graph.getJobID(), jobVertex2.getTaskVertices()[index].getCurrentExecutionAttempt().getAttemptId(), checkpointId, new CheckpointMetrics(), taskOperatorSubtaskStates);
        coord.receiveAcknowledgeMessage(acknowledgeCheckpoint, TASK_MANAGER_LOCATION_INFO);
    }
    List<CompletedCheckpoint> completedCheckpoints = coord.getSuccessfulCheckpoints();
    assertEquals(1, completedCheckpoints.size());
    List<KeyGroupRange> newKeyGroupPartitions2 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism2, newParallelism2);
    // rescale vertex 2
    final ExecutionGraph newGraph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID1, parallelism1, maxParallelism1).addJobVertex(jobVertexID2, newParallelism2, maxParallelism2).build();
    final ExecutionJobVertex newJobVertex1 = newGraph.getJobVertex(jobVertexID1);
    final ExecutionJobVertex newJobVertex2 = newGraph.getJobVertex(jobVertexID2);
    // set up the coordinator and validate the initial state
    CheckpointCoordinator newCoord = new CheckpointCoordinatorBuilder().setExecutionGraph(newGraph).setCompletedCheckpointStore(completedCheckpointStore).setTimer(manuallyTriggeredScheduledExecutor).build();
    Set<ExecutionJobVertex> tasks = new HashSet<>();
    tasks.add(newJobVertex1);
    tasks.add(newJobVertex2);
    assertTrue(newCoord.restoreLatestCheckpointedStateToAll(tasks, false));
    // verify the restored state
    verifyStateRestore(jobVertexID1, newJobVertex1, keyGroupPartitions1);
    List<List<Collection<OperatorStateHandle>>> actualOpStatesBackend = new ArrayList<>(newJobVertex2.getParallelism());
    List<List<Collection<OperatorStateHandle>>> actualOpStatesRaw = new ArrayList<>(newJobVertex2.getParallelism());
    for (int i = 0; i < newJobVertex2.getParallelism(); i++) {
        List<OperatorIDPair> operatorIDs = newJobVertex2.getOperatorIDs();
        KeyGroupsStateHandle originalKeyedStateBackend = generateKeyGroupState(jobVertexID2, newKeyGroupPartitions2.get(i), false);
        KeyGroupsStateHandle originalKeyedStateRaw = generateKeyGroupState(jobVertexID2, newKeyGroupPartitions2.get(i), true);
        JobManagerTaskRestore taskRestore = newJobVertex2.getTaskVertices()[i].getCurrentExecutionAttempt().getTaskRestore();
        Assert.assertEquals(1L, taskRestore.getRestoreCheckpointId());
        TaskStateSnapshot taskStateHandles = taskRestore.getTaskStateSnapshot();
        final int headOpIndex = operatorIDs.size() - 1;
        List<Collection<OperatorStateHandle>> allParallelManagedOpStates = new ArrayList<>(operatorIDs.size());
        List<Collection<OperatorStateHandle>> allParallelRawOpStates = new ArrayList<>(operatorIDs.size());
        for (int idx = 0; idx < operatorIDs.size(); ++idx) {
            OperatorID operatorID = operatorIDs.get(idx).getGeneratedOperatorID();
            OperatorSubtaskState opState = taskStateHandles.getSubtaskStateByOperatorID(operatorID);
            Collection<OperatorStateHandle> opStateBackend = opState.getManagedOperatorState();
            Collection<OperatorStateHandle> opStateRaw = opState.getRawOperatorState();
            allParallelManagedOpStates.add(opStateBackend);
            allParallelRawOpStates.add(opStateRaw);
            if (idx == headOpIndex) {
                Collection<KeyedStateHandle> keyedStateBackend = opState.getManagedKeyedState();
                Collection<KeyedStateHandle> keyGroupStateRaw = opState.getRawKeyedState();
                compareKeyedState(singletonList(originalKeyedStateBackend), keyedStateBackend);
                compareKeyedState(singletonList(originalKeyedStateRaw), keyGroupStateRaw);
            }
        }
        actualOpStatesBackend.add(allParallelManagedOpStates);
        actualOpStatesRaw.add(allParallelRawOpStates);
    }
    comparePartitionableState(expectedOpStatesBackend, actualOpStatesBackend);
    comparePartitionableState(expectedOpStatesRaw, actualOpStatesRaw);
}
Also used : JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) KeyGroupRange(org.apache.flink.runtime.state.KeyGroupRange) ArrayList(java.util.ArrayList) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) KeyedStateHandle(org.apache.flink.runtime.state.KeyedStateHandle) KeyGroupsStateHandle(org.apache.flink.runtime.state.KeyGroupsStateHandle) ChainedStateHandle(org.apache.flink.runtime.state.ChainedStateHandle) Random(java.util.Random) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) Collections.singletonList(java.util.Collections.singletonList) Collections.emptyList(java.util.Collections.emptyList) List(java.util.List) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) CheckpointCoordinatorBuilder(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) Collection(java.util.Collection) OperatorStateHandle(org.apache.flink.runtime.state.OperatorStateHandle) OperatorIDPair(org.apache.flink.runtime.OperatorIDPair)

Example 17 with OperatorIDPair

use of org.apache.flink.runtime.OperatorIDPair in project flink by apache.

the class OperatorCoordinatorSchedulerTest method setupTestJobAndScheduler.

private DefaultScheduler setupTestJobAndScheduler(OperatorCoordinator.Provider provider, @Nullable TaskExecutorOperatorEventGateway taskExecutorOperatorEventGateway, @Nullable Consumer<JobGraph> jobGraphPreProcessing, boolean restartAllOnFailover) throws Exception {
    final OperatorIDPair opIds = OperatorIDPair.of(new OperatorID(), provider.getOperatorId());
    final JobVertex vertex = new JobVertex("Vertex with OperatorCoordinator", testVertexId, Collections.singletonList(opIds));
    vertex.setInvokableClass(NoOpInvokable.class);
    vertex.addOperatorCoordinator(new SerializedValue<>(provider));
    vertex.setParallelism(2);
    final JobGraph jobGraph = JobGraphBuilder.newStreamingJobGraphBuilder().addJobVertex(vertex).build();
    SchedulerTestingUtils.enableCheckpointing(jobGraph);
    if (jobGraphPreProcessing != null) {
        jobGraphPreProcessing.accept(jobGraph);
    }
    final ComponentMainThreadExecutor mainThreadExecutor = new ComponentMainThreadExecutorServiceAdapter((ScheduledExecutorService) executor, Thread.currentThread());
    final SchedulerTestingUtils.DefaultSchedulerBuilder schedulerBuilder = taskExecutorOperatorEventGateway == null ? SchedulerTestingUtils.createSchedulerBuilder(jobGraph, mainThreadExecutor) : SchedulerTestingUtils.createSchedulerBuilder(jobGraph, mainThreadExecutor, taskExecutorOperatorEventGateway);
    if (restartAllOnFailover) {
        schedulerBuilder.setFailoverStrategyFactory(new RestartAllFailoverStrategy.Factory());
    }
    final DefaultScheduler scheduler = schedulerBuilder.setFutureExecutor(executor).setDelayExecutor(executor).build();
    this.createdScheduler = scheduler;
    return scheduler;
}
Also used : JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) ComponentMainThreadExecutor(org.apache.flink.runtime.concurrent.ComponentMainThreadExecutor) RestartAllFailoverStrategy(org.apache.flink.runtime.executiongraph.failover.flip1.RestartAllFailoverStrategy) ComponentMainThreadExecutorServiceAdapter(org.apache.flink.runtime.concurrent.ComponentMainThreadExecutorServiceAdapter) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) SchedulerTestingUtils(org.apache.flink.runtime.scheduler.SchedulerTestingUtils) DefaultScheduler(org.apache.flink.runtime.scheduler.DefaultScheduler) OperatorIDPair(org.apache.flink.runtime.OperatorIDPair)

Example 18 with OperatorIDPair

use of org.apache.flink.runtime.OperatorIDPair in project flink by apache.

the class DrainingValidator method getNumInputs.

private static int getNumInputs(TestJobWithDescription testJob, String operator) {
    Integer explicitNumInputs = testJob.operatorsNumberOfInputs.get(operator);
    if (explicitNumInputs != null) {
        return explicitNumInputs;
    }
    Iterable<JobVertex> vertices = testJob.jobGraph.getVertices();
    for (JobVertex vertex : vertices) {
        for (OperatorIDPair p : vertex.getOperatorIDs()) {
            OperatorID operatorID = p.getUserDefinedOperatorID().orElse(p.getGeneratedOperatorID());
            if (operatorID.toString().equals(operator)) {
                // will have two network inputs
                return vertex.getNumberOfInputs();
            }
        }
    }
    throw new NoSuchElementException(operator);
}
Also used : JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) NoSuchElementException(java.util.NoSuchElementException) OperatorIDPair(org.apache.flink.runtime.OperatorIDPair)

Aggregations

OperatorIDPair (org.apache.flink.runtime.OperatorIDPair)18 ExecutionJobVertex (org.apache.flink.runtime.executiongraph.ExecutionJobVertex)12 OperatorID (org.apache.flink.runtime.jobgraph.OperatorID)11 HashMap (java.util.HashMap)7 HashSet (java.util.HashSet)5 ExecutionGraph (org.apache.flink.runtime.executiongraph.ExecutionGraph)5 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)5 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)4 ArrayList (java.util.ArrayList)3 CheckpointCoordinatorBuilder (org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder)3 Collection (java.util.Collection)2 Collections.emptyList (java.util.Collections.emptyList)2 Collections.singletonList (java.util.Collections.singletonList)2 List (java.util.List)2 Execution (org.apache.flink.runtime.executiongraph.Execution)2 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)2 AcknowledgeCheckpoint (org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint)2 ChainedStateHandle (org.apache.flink.runtime.state.ChainedStateHandle)2 KeyGroupRange (org.apache.flink.runtime.state.KeyGroupRange)2 KeyGroupsStateHandle (org.apache.flink.runtime.state.KeyGroupsStateHandle)2