Search in sources :

Example 16 with OperatorID

use of org.apache.flink.runtime.jobgraph.OperatorID in project flink by apache.

the class StateAssignmentOperationTest method testRepartitionBroadcastStateWithEmptySubtaskState.

@Test
public void testRepartitionBroadcastStateWithEmptySubtaskState() {
    OperatorID operatorID = new OperatorID();
    OperatorState operatorState = new OperatorState(operatorID, 2, 4);
    // Only the subtask 0 reports the states.
    Map<String, OperatorStateHandle.StateMetaInfo> metaInfoMap1 = new HashMap<>(2);
    metaInfoMap1.put("t-5", new OperatorStateHandle.StateMetaInfo(new long[] { 0, 10, 20 }, OperatorStateHandle.Mode.BROADCAST));
    metaInfoMap1.put("t-6", new OperatorStateHandle.StateMetaInfo(new long[] { 30, 40, 50 }, OperatorStateHandle.Mode.BROADCAST));
    OperatorStateHandle osh1 = new OperatorStreamStateHandle(metaInfoMap1, new ByteStreamStateHandle("test1", new byte[60]));
    operatorState.putState(0, OperatorSubtaskState.builder().setManagedOperatorState(osh1).build());
    // The subtask 1 report an empty snapshot.
    operatorState.putState(1, OperatorSubtaskState.builder().build());
    verifyOneKindPartitionableStateRescale(operatorState, operatorID);
}
Also used : HashMap(java.util.HashMap) OperatorStreamStateHandle(org.apache.flink.runtime.state.OperatorStreamStateHandle) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) StateHandleDummyUtil.createNewOperatorStateHandle(org.apache.flink.runtime.checkpoint.StateHandleDummyUtil.createNewOperatorStateHandle) OperatorStateHandle(org.apache.flink.runtime.state.OperatorStateHandle) ByteStreamStateHandle(org.apache.flink.runtime.state.memory.ByteStreamStateHandle) Test(org.junit.Test)

Example 17 with OperatorID

use of org.apache.flink.runtime.jobgraph.OperatorID in project flink by apache.

the class StateAssignmentOperationTest method assigningStatesShouldWorkWithUserDefinedOperatorIdsAsWell.

@Test
public void assigningStatesShouldWorkWithUserDefinedOperatorIdsAsWell() {
    int numSubTasks = 1;
    OperatorID operatorId = new OperatorID();
    OperatorID userDefinedOperatorId = new OperatorID();
    List<OperatorID> operatorIds = singletonList(userDefinedOperatorId);
    ExecutionJobVertex executionJobVertex = buildExecutionJobVertex(operatorId, userDefinedOperatorId, 1);
    Map<OperatorID, OperatorState> states = buildOperatorStates(operatorIds, numSubTasks);
    new StateAssignmentOperation(0, Collections.singleton(executionJobVertex), states, false).assignStates();
    Assert.assertEquals(states.get(userDefinedOperatorId).getState(0), getAssignedState(executionJobVertex, operatorId, 0));
}
Also used : ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) Test(org.junit.Test)

Example 18 with OperatorID

use of org.apache.flink.runtime.jobgraph.OperatorID in project flink by apache.

the class StateAssignmentOperationTest method testChannelStateAssignmentNoRescale.

@Test
public void testChannelStateAssignmentNoRescale() throws JobException, JobExecutionException {
    List<OperatorID> operatorIds = buildOperatorIds(2);
    Map<OperatorID, OperatorState> states = buildOperatorStates(operatorIds, 2);
    Map<OperatorID, ExecutionJobVertex> vertices = buildVertices(operatorIds, 2, RANGE, ROUND_ROBIN);
    new StateAssignmentOperation(0, new HashSet<>(vertices.values()), states, false).assignStates();
    for (OperatorID operatorId : operatorIds) {
        // input is range partitioned, so there is an overlap
        assertState(vertices, operatorId, states, 0, OperatorSubtaskState::getInputChannelState, 0);
        assertState(vertices, operatorId, states, 1, OperatorSubtaskState::getInputChannelState, 1);
        // output is round robin redistributed
        assertState(vertices, operatorId, states, 0, OperatorSubtaskState::getResultSubpartitionState, 0);
        assertState(vertices, operatorId, states, 1, OperatorSubtaskState::getResultSubpartitionState, 1);
    }
    assertEquals(InflightDataRescalingDescriptor.NO_RESCALE, getAssignedState(vertices.get(operatorIds.get(0)), operatorIds.get(0), 0).getOutputRescalingDescriptor());
    assertEquals(InflightDataRescalingDescriptor.NO_RESCALE, getAssignedState(vertices.get(operatorIds.get(0)), operatorIds.get(0), 1).getOutputRescalingDescriptor());
    assertEquals(InflightDataRescalingDescriptor.NO_RESCALE, getAssignedState(vertices.get(operatorIds.get(1)), operatorIds.get(1), 0).getInputRescalingDescriptor());
    assertEquals(InflightDataRescalingDescriptor.NO_RESCALE, getAssignedState(vertices.get(operatorIds.get(1)), operatorIds.get(1), 1).getInputRescalingDescriptor());
}
Also used : ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 19 with OperatorID

use of org.apache.flink.runtime.jobgraph.OperatorID in project flink by apache.

the class StateAssignmentOperationTest method verifyAndCollectStateInfo.

// ------------------------------------------------------------------------
/**
 * Verify that after repartition states, state of different modes works as expected and collect
 * the information of state-name -> how many operator stat handles would be used for new
 * sub-tasks to initialize in total.
 */
private void verifyAndCollectStateInfo(OperatorState operatorState, OperatorID operatorID, int oldParallelism, int newParallelism, Map<String, Integer> stateInfoCounts) {
    final Map<OperatorInstanceID, List<OperatorStateHandle>> newManagedOperatorStates = new HashMap<>();
    StateAssignmentOperation.reDistributePartitionableStates(Collections.singletonMap(operatorID, operatorState), newParallelism, OperatorSubtaskState::getManagedOperatorState, RoundRobinOperatorStateRepartitioner.INSTANCE, newManagedOperatorStates);
    // Verify the repartitioned managed operator states per sub-task.
    for (List<OperatorStateHandle> operatorStateHandles : newManagedOperatorStates.values()) {
        final EnumMap<OperatorStateHandle.Mode, Map<String, Integer>> stateModeOffsets = new EnumMap<>(OperatorStateHandle.Mode.class);
        for (OperatorStateHandle.Mode mode : OperatorStateHandle.Mode.values()) {
            stateModeOffsets.put(mode, new HashMap<>());
        }
        for (OperatorStateHandle operatorStateHandle : operatorStateHandles) {
            for (Map.Entry<String, OperatorStateHandle.StateMetaInfo> stateNameToMetaInfo : operatorStateHandle.getStateNameToPartitionOffsets().entrySet()) {
                String stateName = stateNameToMetaInfo.getKey();
                stateInfoCounts.merge(stateName, 1, (count, inc) -> count + inc);
                OperatorStateHandle.StateMetaInfo stateMetaInfo = stateNameToMetaInfo.getValue();
                stateModeOffsets.get(stateMetaInfo.getDistributionMode()).merge(stateName, stateMetaInfo.getOffsets().length, (count, inc) -> count + inc);
            }
        }
        for (Map.Entry<OperatorStateHandle.Mode, Map<String, Integer>> modeMapEntry : stateModeOffsets.entrySet()) {
            OperatorStateHandle.Mode mode = modeMapEntry.getKey();
            Map<String, Integer> stateOffsets = modeMapEntry.getValue();
            if (OperatorStateHandle.Mode.SPLIT_DISTRIBUTE.equals(mode)) {
                if (oldParallelism < newParallelism) {
                    // SPLIT_DISTRIBUTE: when rescale up, split the state and re-distribute it
                    // -> each one will go to one task
                    stateOffsets.values().forEach(length -> Assert.assertEquals(1, (int) length));
                } else {
                    // SPLIT_DISTRIBUTE: when rescale down to 1 or not rescale, not
                    // re-distribute them.
                    stateOffsets.values().forEach(length -> Assert.assertEquals(2, (int) length));
                }
            } else if (OperatorStateHandle.Mode.UNION.equals(mode)) {
                // UNION: all to all
                stateOffsets.values().forEach(length -> Assert.assertEquals(2, (int) length));
            } else {
                // BROADCAST: so all to all
                stateOffsets.values().forEach(length -> Assert.assertEquals(3, (int) length));
            }
        }
    }
}
Also used : OperatorInstanceID(org.apache.flink.runtime.jobgraph.OperatorInstanceID) InflightDataRescalingDescriptorUtil.set(org.apache.flink.runtime.checkpoint.InflightDataRescalingDescriptorUtil.set) StateHandleDummyUtil.createNewOperatorStateHandle(org.apache.flink.runtime.checkpoint.StateHandleDummyUtil.createNewOperatorStateHandle) CoreMatchers.is(org.hamcrest.CoreMatchers.is) Arrays(java.util.Arrays) TestingDefaultExecutionGraphBuilder(org.apache.flink.runtime.executiongraph.TestingDefaultExecutionGraphBuilder) RANGE(org.apache.flink.runtime.io.network.api.writer.SubtaskStateMapper.RANGE) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) SubtaskStateMapper(org.apache.flink.runtime.io.network.api.writer.SubtaskStateMapper) Random(java.util.Random) RESCALING(org.apache.flink.runtime.checkpoint.InflightDataRescalingDescriptor.InflightDataGateOrPartitionRescalingDescriptor.MappingType.RESCALING) Collections.singletonList(java.util.Collections.singletonList) ARBITRARY(org.apache.flink.runtime.io.network.api.writer.SubtaskStateMapper.ARBITRARY) Arrays.asList(java.util.Arrays.asList) Map(java.util.Map) TestLogger(org.apache.flink.util.TestLogger) InflightDataGateOrPartitionRescalingDescriptor(org.apache.flink.runtime.checkpoint.InflightDataRescalingDescriptor.InflightDataGateOrPartitionRescalingDescriptor) StateHandleDummyUtil.createNewResultSubpartitionStateHandle(org.apache.flink.runtime.checkpoint.StateHandleDummyUtil.createNewResultSubpartitionStateHandle) KeyGroupRange(org.apache.flink.runtime.state.KeyGroupRange) EnumMap(java.util.EnumMap) KeyedStateHandle(org.apache.flink.runtime.state.KeyedStateHandle) Set(java.util.Set) Collectors(java.util.stream.Collectors) List(java.util.List) StateHandleDummyUtil.createNewInputChannelStateHandle(org.apache.flink.runtime.checkpoint.StateHandleDummyUtil.createNewInputChannelStateHandle) Stream(java.util.stream.Stream) OperatorInstanceID(org.apache.flink.runtime.jobgraph.OperatorInstanceID) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) InflightDataRescalingDescriptorUtil.rescalingDescriptor(org.apache.flink.runtime.checkpoint.InflightDataRescalingDescriptorUtil.rescalingDescriptor) IntStream(java.util.stream.IntStream) JobEdge(org.apache.flink.runtime.jobgraph.JobEdge) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) ResultPartitionType(org.apache.flink.runtime.io.network.partition.ResultPartitionType) HashMap(java.util.HashMap) StateHandleDummyUtil.createNewKeyedStateHandle(org.apache.flink.runtime.checkpoint.StateHandleDummyUtil.createNewKeyedStateHandle) OperatorStreamStateHandle(org.apache.flink.runtime.state.OperatorStreamStateHandle) Function(java.util.function.Function) InflightDataRescalingDescriptorUtil.array(org.apache.flink.runtime.checkpoint.InflightDataRescalingDescriptorUtil.array) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) HashSet(java.util.HashSet) JobException(org.apache.flink.runtime.JobException) InflightDataRescalingDescriptorUtil.to(org.apache.flink.runtime.checkpoint.InflightDataRescalingDescriptorUtil.to) JobGraphTestUtils(org.apache.flink.runtime.jobgraph.JobGraphTestUtils) OperatorStateHandle(org.apache.flink.runtime.state.OperatorStateHandle) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) DistributionPattern(org.apache.flink.runtime.jobgraph.DistributionPattern) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) Matchers.empty(org.hamcrest.Matchers.empty) Collections.emptySet(java.util.Collections.emptySet) ExecutionGraphTestUtils(org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils) ROUND_ROBIN(org.apache.flink.runtime.io.network.api.writer.SubtaskStateMapper.ROUND_ROBIN) Test(org.junit.Test) OperatorIDPair(org.apache.flink.runtime.OperatorIDPair) InflightDataRescalingDescriptorUtil.mappings(org.apache.flink.runtime.checkpoint.InflightDataRescalingDescriptorUtil.mappings) ByteStreamStateHandle(org.apache.flink.runtime.state.memory.ByteStreamStateHandle) Assert(org.junit.Assert) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) NoOpInvokable(org.apache.flink.runtime.testtasks.NoOpInvokable) HashMap(java.util.HashMap) Collections.singletonList(java.util.Collections.singletonList) Arrays.asList(java.util.Arrays.asList) List(java.util.List) StateHandleDummyUtil.createNewOperatorStateHandle(org.apache.flink.runtime.checkpoint.StateHandleDummyUtil.createNewOperatorStateHandle) OperatorStateHandle(org.apache.flink.runtime.state.OperatorStateHandle) Map(java.util.Map) EnumMap(java.util.EnumMap) HashMap(java.util.HashMap) EnumMap(java.util.EnumMap)

Example 20 with OperatorID

use of org.apache.flink.runtime.jobgraph.OperatorID in project flink by apache.

the class StateAssignmentOperationTest method testStateWithFullyFinishedOperators.

@Test
public void testStateWithFullyFinishedOperators() throws JobException, JobExecutionException {
    List<OperatorID> operatorIds = buildOperatorIds(2);
    Map<OperatorID, OperatorState> states = buildOperatorStates(Collections.singletonList(operatorIds.get(1)), 3);
    // Create an operator state marked as finished
    OperatorState operatorState = new FullyFinishedOperatorState(operatorIds.get(0), 3, 256);
    states.put(operatorIds.get(0), operatorState);
    Map<OperatorID, ExecutionJobVertex> vertices = buildVertices(operatorIds, 2, RANGE, ROUND_ROBIN);
    new StateAssignmentOperation(0, new HashSet<>(vertices.values()), states, false).assignStates();
    // Check the job vertex with only finished operator.
    ExecutionJobVertex jobVertexWithFinishedOperator = vertices.get(operatorIds.get(0));
    for (ExecutionVertex task : jobVertexWithFinishedOperator.getTaskVertices()) {
        JobManagerTaskRestore taskRestore = task.getCurrentExecutionAttempt().getTaskRestore();
        Assert.assertTrue(taskRestore.getTaskStateSnapshot().isTaskDeployedAsFinished());
    }
    // Check the job vertex without finished operator.
    ExecutionJobVertex jobVertexWithoutFinishedOperator = vertices.get(operatorIds.get(1));
    for (ExecutionVertex task : jobVertexWithoutFinishedOperator.getTaskVertices()) {
        JobManagerTaskRestore taskRestore = task.getCurrentExecutionAttempt().getTaskRestore();
        Assert.assertFalse(taskRestore.getTaskStateSnapshot().isTaskDeployedAsFinished());
    }
}
Also used : ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

OperatorID (org.apache.flink.runtime.jobgraph.OperatorID)211 Test (org.junit.Test)132 HashMap (java.util.HashMap)46 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)44 StreamConfig (org.apache.flink.streaming.api.graph.StreamConfig)41 JobID (org.apache.flink.api.common.JobID)38 Configuration (org.apache.flink.configuration.Configuration)30 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)28 ExecutionJobVertex (org.apache.flink.runtime.executiongraph.ExecutionJobVertex)28 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)24 ExecutionGraph (org.apache.flink.runtime.executiongraph.ExecutionGraph)23 OperatorSubtaskState (org.apache.flink.runtime.checkpoint.OperatorSubtaskState)21 OperatorStateHandle (org.apache.flink.runtime.state.OperatorStateHandle)21 ArrayList (java.util.ArrayList)20 HashSet (java.util.HashSet)20 TaskStateSnapshot (org.apache.flink.runtime.checkpoint.TaskStateSnapshot)19 OperatorStreamStateHandle (org.apache.flink.runtime.state.OperatorStreamStateHandle)19 MemoryStateBackend (org.apache.flink.runtime.state.memory.MemoryStateBackend)19 IOException (java.io.IOException)18 ExecutionVertex (org.apache.flink.runtime.executiongraph.ExecutionVertex)18