Search in sources :

Example 26 with IntermediateResultPartitionID

use of org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID in project flink by apache.

the class RestartPipelinedRegionFailoverStrategyTest method testRegionFailoverForVariousResultPartitionAvailabilityCombinations.

/**
 * Tests to verify region failover results regarding different input result partition
 * availability combinations.
 *
 * <pre>
 *     (v1) --rp1--\
 *                 (v3)
 *     (v2) --rp2--/
 *
 *             ^
 *             |
 *         (blocking)
 * </pre>
 *
 * Each vertex is in an individual region. rp1, rp2 are result partitions.
 */
@Test
public void testRegionFailoverForVariousResultPartitionAvailabilityCombinations() throws Exception {
    TestingSchedulingTopology topology = new TestingSchedulingTopology();
    TestingSchedulingExecutionVertex v1 = topology.newExecutionVertex(ExecutionState.FINISHED);
    TestingSchedulingExecutionVertex v2 = topology.newExecutionVertex(ExecutionState.FINISHED);
    TestingSchedulingExecutionVertex v3 = topology.newExecutionVertex(ExecutionState.RUNNING);
    topology.connect(v1, v3, ResultPartitionType.BLOCKING);
    topology.connect(v2, v3, ResultPartitionType.BLOCKING);
    TestResultPartitionAvailabilityChecker availabilityChecker = new TestResultPartitionAvailabilityChecker();
    RestartPipelinedRegionFailoverStrategy strategy = new RestartPipelinedRegionFailoverStrategy(topology, availabilityChecker);
    IntermediateResultPartitionID rp1ID = v1.getProducedResults().iterator().next().getId();
    IntermediateResultPartitionID rp2ID = v2.getProducedResults().iterator().next().getId();
    // -------------------------------------------------
    // Combination1: (rp1 == available, rp2 == available)
    // -------------------------------------------------
    availabilityChecker.failedPartitions.clear();
    verifyThatFailedExecution(strategy, v1).restarts(v1, v3);
    verifyThatFailedExecution(strategy, v2).restarts(v2, v3);
    verifyThatFailedExecution(strategy, v3).restarts(v3);
    // -------------------------------------------------
    // Combination2: (rp1 == unavailable, rp2 == available)
    // -------------------------------------------------
    availabilityChecker.failedPartitions.clear();
    availabilityChecker.markResultPartitionFailed(rp1ID);
    verifyThatFailedExecution(strategy, v1).restarts(v1, v3);
    verifyThatFailedExecution(strategy, v2).restarts(v1, v2, v3);
    verifyThatFailedExecution(strategy, v3).restarts(v1, v3);
    // -------------------------------------------------
    // Combination3: (rp1 == available, rp2 == unavailable)
    // -------------------------------------------------
    availabilityChecker.failedPartitions.clear();
    availabilityChecker.markResultPartitionFailed(rp2ID);
    verifyThatFailedExecution(strategy, v1).restarts(v1, v2, v3);
    verifyThatFailedExecution(strategy, v2).restarts(v2, v3);
    verifyThatFailedExecution(strategy, v3).restarts(v2, v3);
    // -------------------------------------------------
    // Combination4: (rp1 == unavailable, rp == unavailable)
    // -------------------------------------------------
    availabilityChecker.failedPartitions.clear();
    availabilityChecker.markResultPartitionFailed(rp1ID);
    availabilityChecker.markResultPartitionFailed(rp2ID);
    verifyThatFailedExecution(strategy, v1).restarts(v1, v2, v3);
    verifyThatFailedExecution(strategy, v2).restarts(v1, v2, v3);
    verifyThatFailedExecution(strategy, v3).restarts(v1, v2, v3);
}
Also used : TestingSchedulingExecutionVertex(org.apache.flink.runtime.scheduler.strategy.TestingSchedulingExecutionVertex) TestingSchedulingTopology(org.apache.flink.runtime.scheduler.strategy.TestingSchedulingTopology) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) Test(org.junit.Test)

Example 27 with IntermediateResultPartitionID

use of org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID in project flink by apache.

the class SingleInputGateTest method testSingleInputGateWithSubpartitionIndexRange.

@Test
public void testSingleInputGateWithSubpartitionIndexRange() throws IOException, InterruptedException {
    IntermediateResultPartitionID[] partitionIds = new IntermediateResultPartitionID[] { new IntermediateResultPartitionID(), new IntermediateResultPartitionID(), new IntermediateResultPartitionID() };
    SubpartitionIndexRange subpartitionIndexRange = new SubpartitionIndexRange(0, 1);
    final NettyShuffleEnvironment netEnv = new NettyShuffleEnvironmentBuilder().build();
    ResourceID localLocation = ResourceID.generate();
    SingleInputGate gate = createSingleInputGate(partitionIds, ResultPartitionType.BLOCKING, subpartitionIndexRange, netEnv, localLocation, new TestingConnectionManager(), new TestingResultPartitionManager(new NoOpResultSubpartitionView()));
    for (InputChannel channel : gate.getInputChannels().values()) {
        if (channel instanceof ChannelStateHolder) {
            ((ChannelStateHolder) channel).setChannelStateWriter(ChannelStateWriter.NO_OP);
        }
    }
    SubpartitionInfo info1 = createSubpartitionInfo(partitionIds[0], 0);
    SubpartitionInfo info2 = createSubpartitionInfo(partitionIds[0], 1);
    SubpartitionInfo info3 = createSubpartitionInfo(partitionIds[1], 0);
    SubpartitionInfo info4 = createSubpartitionInfo(partitionIds[1], 1);
    SubpartitionInfo info5 = createSubpartitionInfo(partitionIds[2], 0);
    SubpartitionInfo info6 = createSubpartitionInfo(partitionIds[2], 1);
    assertThat(gate.getInputChannels().size(), is(6));
    assertThat(gate.getInputChannels().get(info1).getConsumedSubpartitionIndex(), is(0));
    assertThat(gate.getInputChannels().get(info2).getConsumedSubpartitionIndex(), is(1));
    assertThat(gate.getInputChannels().get(info3).getConsumedSubpartitionIndex(), is(0));
    assertThat(gate.getInputChannels().get(info4).getConsumedSubpartitionIndex(), is(1));
    assertThat(gate.getInputChannels().get(info5).getConsumedSubpartitionIndex(), is(0));
    assertThat(gate.getInputChannels().get(info6).getConsumedSubpartitionIndex(), is(1));
    assertChannelsType(gate, LocalRecoveredInputChannel.class, Arrays.asList(info1, info2));
    assertChannelsType(gate, RemoteRecoveredInputChannel.class, Arrays.asList(info3, info4));
    assertChannelsType(gate, UnknownInputChannel.class, Arrays.asList(info5, info6));
    // test setup
    gate.setup();
    assertNotNull(gate.getBufferPool());
    assertEquals(1, gate.getBufferPool().getNumberOfRequiredMemorySegments());
    gate.finishReadRecoveredState();
    while (!gate.getStateConsumedFuture().isDone()) {
        gate.pollNext();
    }
    // test request partitions
    gate.requestPartitions();
    gate.pollNext();
    assertChannelsType(gate, LocalInputChannel.class, Arrays.asList(info1, info2));
    assertChannelsType(gate, RemoteInputChannel.class, Arrays.asList(info3, info4));
    assertChannelsType(gate, UnknownInputChannel.class, Arrays.asList(info5, info6));
    for (InputChannel inputChannel : gate.getInputChannels().values()) {
        if (inputChannel instanceof RemoteInputChannel) {
            assertNotNull(((RemoteInputChannel) inputChannel).getPartitionRequestClient());
            assertEquals(2, ((RemoteInputChannel) inputChannel).getInitialCredit());
        } else if (inputChannel instanceof LocalInputChannel) {
            assertNotNull(((LocalInputChannel) inputChannel).getSubpartitionView());
        }
    }
    // test update channels
    gate.updateInputChannel(localLocation, createRemoteWithIdAndLocation(partitionIds[2], localLocation));
    assertChannelsType(gate, LocalInputChannel.class, Arrays.asList(info1, info2));
    assertChannelsType(gate, RemoteInputChannel.class, Arrays.asList(info3, info4));
    assertChannelsType(gate, LocalInputChannel.class, Arrays.asList(info5, info6));
}
Also used : TestingConnectionManager(org.apache.flink.runtime.io.network.TestingConnectionManager) SubpartitionInfo(org.apache.flink.runtime.io.network.partition.consumer.SingleInputGate.SubpartitionInfo) NettyShuffleEnvironmentBuilder(org.apache.flink.runtime.io.network.NettyShuffleEnvironmentBuilder) NettyShuffleEnvironment(org.apache.flink.runtime.io.network.NettyShuffleEnvironment) ChannelStateHolder(org.apache.flink.runtime.io.network.partition.ChannelStateHolder) InputChannelTestUtils.createRemoteInputChannel(org.apache.flink.runtime.io.network.partition.InputChannelTestUtils.createRemoteInputChannel) NoOpResultSubpartitionView(org.apache.flink.runtime.io.network.partition.NoOpResultSubpartitionView) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) InputChannelTestUtils.createRemoteInputChannel(org.apache.flink.runtime.io.network.partition.InputChannelTestUtils.createRemoteInputChannel) InputChannelTestUtils.createLocalInputChannel(org.apache.flink.runtime.io.network.partition.InputChannelTestUtils.createLocalInputChannel) InputChannelTestUtils.createLocalInputChannel(org.apache.flink.runtime.io.network.partition.InputChannelTestUtils.createLocalInputChannel) SubpartitionIndexRange(org.apache.flink.runtime.deployment.SubpartitionIndexRange) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) Test(org.junit.Test)

Example 28 with IntermediateResultPartitionID

use of org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID in project flink by apache.

the class TaskIOMetricGroupTest method testNumBytesProducedOfPartitionsMetrics.

@Test
public void testNumBytesProducedOfPartitionsMetrics() {
    TaskMetricGroup task = UnregisteredMetricGroups.createUnregisteredTaskMetricGroup();
    TaskIOMetricGroup taskIO = task.getIOMetricGroup();
    Counter c1 = new SimpleCounter();
    c1.inc(32L);
    Counter c2 = new SimpleCounter();
    c2.inc(64L);
    IntermediateResultPartitionID resultPartitionID1 = new IntermediateResultPartitionID();
    IntermediateResultPartitionID resultPartitionID2 = new IntermediateResultPartitionID();
    taskIO.registerNumBytesProducedCounterForPartition(resultPartitionID1, c1);
    taskIO.registerNumBytesProducedCounterForPartition(resultPartitionID2, c2);
    Map<IntermediateResultPartitionID, Long> numBytesProducedOfPartitions = taskIO.createSnapshot().getNumBytesProducedOfPartitions();
    assertEquals(2, numBytesProducedOfPartitions.size());
    assertEquals(32L, numBytesProducedOfPartitions.get(resultPartitionID1).longValue());
    assertEquals(64L, numBytesProducedOfPartitions.get(resultPartitionID2).longValue());
}
Also used : SimpleCounter(org.apache.flink.metrics.SimpleCounter) Counter(org.apache.flink.metrics.Counter) SimpleCounter(org.apache.flink.metrics.SimpleCounter) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) Test(org.junit.Test)

Example 29 with IntermediateResultPartitionID

use of org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID in project flink by apache.

the class NettyShuffleUtilsTest method createInputGate.

private SingleInputGate createInputGate(NettyShuffleEnvironment network, ResultPartitionType resultPartitionType, int numInputChannels) throws IOException {
    ShuffleDescriptor[] shuffleDescriptors = new NettyShuffleDescriptor[numInputChannels];
    for (int i = 0; i < numInputChannels; i++) {
        shuffleDescriptors[i] = createRemoteWithIdAndLocation(new IntermediateResultPartitionID(), ResourceID.generate());
    }
    InputGateDeploymentDescriptor inputGateDeploymentDescriptor = new InputGateDeploymentDescriptor(new IntermediateDataSetID(), resultPartitionType, 0, shuffleDescriptors);
    ExecutionAttemptID consumerID = new ExecutionAttemptID();
    Collection<SingleInputGate> inputGates = network.createInputGates(network.createShuffleIOOwnerContext("", consumerID, new UnregisteredMetricsGroup()), SingleInputGateBuilder.NO_OP_PRODUCER_CHECKER, Collections.singletonList(inputGateDeploymentDescriptor));
    return inputGates.iterator().next();
}
Also used : UnregisteredMetricsGroup(org.apache.flink.metrics.groups.UnregisteredMetricsGroup) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) InputGateDeploymentDescriptor(org.apache.flink.runtime.deployment.InputGateDeploymentDescriptor) SingleInputGate(org.apache.flink.runtime.io.network.partition.consumer.SingleInputGate) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID)

Example 30 with IntermediateResultPartitionID

use of org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID in project flink by apache.

the class DefaultSchedulingPipelinedRegionTest method returnsIncidentBlockingPartitions.

/**
 * Tests if the consumed inputs of the pipelined regions are computed correctly using the Job
 * graph below.
 *
 * <pre>
 *          c
 *        /  X
 * a -+- b   e
 *       \  /
 *        d
 * </pre>
 *
 * <p>Pipelined regions: {a}, {b, c, d, e}
 */
@Test
public void returnsIncidentBlockingPartitions() throws Exception {
    final JobVertex a = ExecutionGraphTestUtils.createNoOpVertex(1);
    final JobVertex b = ExecutionGraphTestUtils.createNoOpVertex(1);
    final JobVertex c = ExecutionGraphTestUtils.createNoOpVertex(1);
    final JobVertex d = ExecutionGraphTestUtils.createNoOpVertex(1);
    final JobVertex e = ExecutionGraphTestUtils.createNoOpVertex(1);
    b.connectNewDataSetAsInput(a, DistributionPattern.POINTWISE, ResultPartitionType.BLOCKING);
    c.connectNewDataSetAsInput(b, DistributionPattern.POINTWISE, ResultPartitionType.PIPELINED);
    d.connectNewDataSetAsInput(b, DistributionPattern.POINTWISE, ResultPartitionType.PIPELINED);
    e.connectNewDataSetAsInput(c, DistributionPattern.POINTWISE, ResultPartitionType.BLOCKING);
    e.connectNewDataSetAsInput(d, DistributionPattern.POINTWISE, ResultPartitionType.PIPELINED);
    final DefaultExecutionGraph simpleTestGraph = ExecutionGraphTestUtils.createSimpleTestGraph(a, b, c, d, e);
    final DefaultExecutionTopology topology = DefaultExecutionTopology.fromExecutionGraph(simpleTestGraph);
    final DefaultSchedulingPipelinedRegion firstPipelinedRegion = topology.getPipelinedRegionOfVertex(new ExecutionVertexID(a.getID(), 0));
    final DefaultSchedulingPipelinedRegion secondPipelinedRegion = topology.getPipelinedRegionOfVertex(new ExecutionVertexID(e.getID(), 0));
    final DefaultExecutionVertex vertexB0 = topology.getVertex(new ExecutionVertexID(b.getID(), 0));
    final IntermediateResultPartitionID b0ConsumedResultPartition = Iterables.getOnlyElement(vertexB0.getConsumedResults()).getId();
    final Set<IntermediateResultPartitionID> secondPipelinedRegionConsumedResults = new HashSet<>();
    for (ConsumedPartitionGroup consumedPartitionGroup : secondPipelinedRegion.getAllBlockingConsumedPartitionGroups()) {
        for (IntermediateResultPartitionID partitionId : consumedPartitionGroup) {
            if (!secondPipelinedRegion.contains(topology.getResultPartition(partitionId).getProducer().getId())) {
                secondPipelinedRegionConsumedResults.add(partitionId);
            }
        }
    }
    assertThat(firstPipelinedRegion.getAllBlockingConsumedPartitionGroups().iterator().hasNext(), is(false));
    assertThat(secondPipelinedRegionConsumedResults, contains(b0ConsumedResultPartition));
}
Also used : ConsumedPartitionGroup(org.apache.flink.runtime.scheduler.strategy.ConsumedPartitionGroup) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) ExecutionVertexID(org.apache.flink.runtime.scheduler.strategy.ExecutionVertexID) DefaultExecutionGraph(org.apache.flink.runtime.executiongraph.DefaultExecutionGraph) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

IntermediateResultPartitionID (org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID)66 Test (org.junit.Test)41 IntermediateDataSetID (org.apache.flink.runtime.jobgraph.IntermediateDataSetID)18 ConsumedPartitionGroup (org.apache.flink.runtime.scheduler.strategy.ConsumedPartitionGroup)14 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)13 JobID (org.apache.flink.api.common.JobID)12 ExecutionVertexID (org.apache.flink.runtime.scheduler.strategy.ExecutionVertexID)12 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)11 ArrayList (java.util.ArrayList)10 TaskDeploymentDescriptor (org.apache.flink.runtime.deployment.TaskDeploymentDescriptor)10 ResultPartitionID (org.apache.flink.runtime.io.network.partition.ResultPartitionID)10 InputGateDeploymentDescriptor (org.apache.flink.runtime.deployment.InputGateDeploymentDescriptor)9 ResultPartitionDeploymentDescriptor (org.apache.flink.runtime.deployment.ResultPartitionDeploymentDescriptor)9 CompletableFuture (java.util.concurrent.CompletableFuture)8 Configuration (org.apache.flink.configuration.Configuration)8 ShuffleDescriptor (org.apache.flink.runtime.shuffle.ShuffleDescriptor)8 IOException (java.io.IOException)7 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)7 Collection (java.util.Collection)6 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)6