Search in sources :

Example 11 with ResultPartitionWriter

use of org.apache.flink.runtime.io.network.api.writer.ResultPartitionWriter in project flink by apache.

the class StreamTaskFinalCheckpointsTest method testWaitingForFinalCheckpointNotTheFirsNotifiedComplete.

@Test
public void testWaitingForFinalCheckpointNotTheFirsNotifiedComplete() throws Exception {
    ResultPartition[] partitionWriters = new ResultPartition[2];
    try {
        for (int i = 0; i < partitionWriters.length; ++i) {
            partitionWriters[i] = PartitionTestUtils.createPartition(ResultPartitionType.PIPELINED_BOUNDED);
            partitionWriters[i].setup();
        }
        CompletingCheckpointResponder checkpointResponder = new CompletingCheckpointResponder();
        try (StreamTaskMailboxTestHarness<String> testHarness = createTestHarness(partitionWriters, checkpointResponder, false)) {
            // complete only the third checkpoint
            checkpointResponder.completeCheckpoints(Collections.singletonList(3L));
            // finish data on all channels
            testHarness.waitForTaskCompletion();
            // trigger the first checkpoint
            CompletableFuture<Boolean> firstCheckpoint = triggerCheckpoint(testHarness, 1);
            // Notifies the result partition that all records are processed after the
            // first checkpoint is triggered.
            firstCheckpoint.thenAccept((ignored) -> {
                for (ResultPartition resultPartition : partitionWriters) {
                    resultPartition.onSubpartitionAllDataProcessed(0);
                }
            });
            testHarness.processAll();
            testHarness.getTaskStateManager().getWaitForReportLatch().await();
            // trigger a second checkpoint
            triggerCheckpoint(testHarness, 2L);
            testHarness.processAll();
            testHarness.getTaskStateManager().getWaitForReportLatch().await();
            // trigger the third checkpoint
            triggerCheckpoint(testHarness, 3L);
            testHarness.processAll();
            testHarness.finishProcessing();
            testHarness.getTaskStateManager().getWaitForReportLatch().await();
            assertEquals(3L, testHarness.getTaskStateManager().getReportedCheckpointId());
            assertEquals(3L, testHarness.getTaskStateManager().getNotifiedCompletedCheckpointId());
            // Each result partition should have emitted 3 barriers and 1 EndOfUserRecordsEvent.
            for (ResultPartition resultPartition : partitionWriters) {
                assertEquals(4, resultPartition.getNumberOfQueuedBuffers());
            }
        }
    } finally {
        for (ResultPartitionWriter writer : partitionWriters) {
            if (writer != null) {
                writer.close();
            }
        }
    }
}
Also used : CompletingCheckpointResponder(org.apache.flink.streaming.util.CompletingCheckpointResponder) ResultPartitionWriter(org.apache.flink.runtime.io.network.api.writer.ResultPartitionWriter) ResultPartition(org.apache.flink.runtime.io.network.partition.ResultPartition) PipelinedResultPartition(org.apache.flink.runtime.io.network.partition.PipelinedResultPartition) Test(org.junit.Test)

Example 12 with ResultPartitionWriter

use of org.apache.flink.runtime.io.network.api.writer.ResultPartitionWriter in project flink by apache.

the class StreamTaskFinalCheckpointsTest method testTriggerStopWithSavepointWhenWaitingForFinalCheckpoint.

@Test
public void testTriggerStopWithSavepointWhenWaitingForFinalCheckpoint() throws Exception {
    ResultPartition[] partitionWriters = new ResultPartition[2];
    try {
        for (int i = 0; i < partitionWriters.length; ++i) {
            partitionWriters[i] = PartitionTestUtils.createPartition(ResultPartitionType.PIPELINED_BOUNDED);
            partitionWriters[i].setup();
        }
        int finalCheckpointId = 6;
        int syncSavepointId = 7;
        CompletingCheckpointResponder checkpointResponder = new CompletingCheckpointResponder() {

            @Override
            public void acknowledgeCheckpoint(JobID jobID, ExecutionAttemptID executionAttemptID, long checkpointId, CheckpointMetrics checkpointMetrics, TaskStateSnapshot subtaskState) {
                if (syncSavepointId == checkpointId) {
                    // complete the final checkpoint when sync savepoint acknowledged
                    // we should wait for the sync savepoint to complete
                    super.acknowledgeCheckpoint(jobID, executionAttemptID, finalCheckpointId, checkpointMetrics, subtaskState);
                    try {
                        // Give some potential time for the task to finish before the
                        // savepoint is notified complete
                        Thread.sleep(CONCURRENT_EVENT_WAIT_PERIOD_MS);
                    } catch (InterruptedException e) {
                        throw new FlinkRuntimeException(e);
                    }
                    super.acknowledgeCheckpoint(jobID, executionAttemptID, syncSavepointId, checkpointMetrics, subtaskState);
                }
            }
        };
        try (StreamTaskMailboxTestHarness<String> testHarness = createTestHarness(partitionWriters, checkpointResponder, false)) {
            // Tests triggering checkpoint after received all the inputs have received
            // EndOfPartition.
            testHarness.waitForTaskCompletion();
            // trigger the final checkpoint
            CompletableFuture<Boolean> checkpointFuture = triggerCheckpoint(testHarness, finalCheckpointId);
            // Notifies the result partition that all records are processed after the
            // last checkpoint is triggered.
            checkpointFuture.thenAccept((ignored) -> {
                for (ResultPartition resultPartition : partitionWriters) {
                    resultPartition.onSubpartitionAllDataProcessed(0);
                }
            });
            // trigger the synchronous savepoint
            CompletableFuture<Boolean> savepointFuture = triggerStopWithSavepointDrain(testHarness, syncSavepointId);
            // The checkpoint 6 would be triggered successfully.
            testHarness.finishProcessing();
            assertTrue(checkpointFuture.isDone());
            assertTrue(savepointFuture.isDone());
            testHarness.getTaskStateManager().getWaitForReportLatch().await();
            assertEquals(syncSavepointId, testHarness.getTaskStateManager().getReportedCheckpointId());
            assertEquals(syncSavepointId, testHarness.getTaskStateManager().getNotifiedCompletedCheckpointId());
            // Each result partition should have emitted 2 barriers and 1 EndOfUserRecordsEvent.
            for (ResultPartition resultPartition : partitionWriters) {
                assertEquals(3, resultPartition.getNumberOfQueuedBuffers());
            }
        }
    } finally {
        for (ResultPartitionWriter writer : partitionWriters) {
            if (writer != null) {
                writer.close();
            }
        }
    }
}
Also used : CompletingCheckpointResponder(org.apache.flink.streaming.util.CompletingCheckpointResponder) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) ResultPartitionWriter(org.apache.flink.runtime.io.network.api.writer.ResultPartitionWriter) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) ResultPartition(org.apache.flink.runtime.io.network.partition.ResultPartition) PipelinedResultPartition(org.apache.flink.runtime.io.network.partition.PipelinedResultPartition) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) FlinkRuntimeException(org.apache.flink.util.FlinkRuntimeException) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 13 with ResultPartitionWriter

use of org.apache.flink.runtime.io.network.api.writer.ResultPartitionWriter in project flink by apache.

the class StreamTaskFinalCheckpointsTest method testTriggeringCheckpointWithFinishedChannels.

private void testTriggeringCheckpointWithFinishedChannels(CheckpointOptions checkpointOptions) throws Exception {
    ResultPartition[] partitionWriters = new ResultPartition[2];
    try {
        for (int i = 0; i < partitionWriters.length; ++i) {
            partitionWriters[i] = PartitionTestUtils.createPartition(ResultPartitionType.PIPELINED_BOUNDED);
            partitionWriters[i].setup();
        }
        try (StreamTaskMailboxTestHarness<String> testHarness = createTestHarness(partitionWriters, new CompletingCheckpointResponder(), checkpointOptions.isUnalignedCheckpoint() || checkpointOptions.isTimeoutable())) {
            int numChannels = testHarness.inputGates[0].getInputGate().getNumberOfInputChannels();
            int[] resumedCount = new int[numChannels];
            for (int i = 0; i < numChannels; ++i) {
                TestInputChannel inputChannel = (TestInputChannel) testHarness.inputGates[0].getInputGate().getChannel(i);
                inputChannel.setActionOnResumed(() -> resumedCount[inputChannel.getChannelIndex()]++);
            }
            // Tests triggering checkpoint when all the inputs are alive.
            CompletableFuture<Boolean> checkpointFuture = triggerCheckpoint(testHarness, 2, checkpointOptions);
            processMailTillCheckpointSucceeds(testHarness, checkpointFuture);
            assertEquals(2, testHarness.getTaskStateManager().getReportedCheckpointId());
            assertArrayEquals(new int[] { 0, 0, 0 }, resumedCount);
            // Tests triggering checkpoint after some inputs have received EndOfPartition.
            testHarness.processEvent(new EndOfData(StopMode.DRAIN), 0, 0);
            testHarness.processEvent(EndOfPartitionEvent.INSTANCE, 0, 0);
            checkpointFuture = triggerCheckpoint(testHarness, 4, checkpointOptions);
            processMailTillCheckpointSucceeds(testHarness, checkpointFuture);
            assertEquals(4, testHarness.getTaskStateManager().getReportedCheckpointId());
            assertArrayEquals(new int[] { 0, 0, 0 }, resumedCount);
            // Tests triggering checkpoint after received all the inputs have received
            // EndOfPartition.
            testHarness.processEvent(new EndOfData(StopMode.DRAIN), 0, 1);
            testHarness.processEvent(new EndOfData(StopMode.DRAIN), 0, 2);
            testHarness.processEvent(EndOfPartitionEvent.INSTANCE, 0, 1);
            testHarness.processEvent(EndOfPartitionEvent.INSTANCE, 0, 2);
            checkpointFuture = triggerCheckpoint(testHarness, 6, checkpointOptions);
            // Notifies the result partition that all records are processed after the
            // last checkpoint is triggered.
            checkpointFuture.thenAccept((ignored) -> {
                for (ResultPartition resultPartition : partitionWriters) {
                    resultPartition.onSubpartitionAllDataProcessed(0);
                }
            });
            // The checkpoint 6 would be triggered successfully.
            testHarness.finishProcessing();
            assertTrue(checkpointFuture.isDone());
            testHarness.getTaskStateManager().getWaitForReportLatch().await();
            assertEquals(6, testHarness.getTaskStateManager().getReportedCheckpointId());
            assertArrayEquals(new int[] { 0, 0, 0 }, resumedCount);
            // Each result partition should have emitted 3 barriers and 1 EndOfUserRecordsEvent.
            for (ResultPartition resultPartition : partitionWriters) {
                assertEquals(4, resultPartition.getNumberOfQueuedBuffers());
            }
        }
    } finally {
        for (ResultPartitionWriter writer : partitionWriters) {
            if (writer != null) {
                writer.close();
            }
        }
    }
}
Also used : EndOfData(org.apache.flink.runtime.io.network.api.EndOfData) CompletingCheckpointResponder(org.apache.flink.streaming.util.CompletingCheckpointResponder) TestInputChannel(org.apache.flink.runtime.io.network.partition.consumer.TestInputChannel) ResultPartitionWriter(org.apache.flink.runtime.io.network.api.writer.ResultPartitionWriter) ResultPartition(org.apache.flink.runtime.io.network.partition.ResultPartition) PipelinedResultPartition(org.apache.flink.runtime.io.network.partition.PipelinedResultPartition)

Example 14 with ResultPartitionWriter

use of org.apache.flink.runtime.io.network.api.writer.ResultPartitionWriter in project flink by apache.

the class StreamTaskFinalCheckpointsTest method testNotWaitingForAllRecordsProcessedIfCheckpointNotEnabled.

@Test
public void testNotWaitingForAllRecordsProcessedIfCheckpointNotEnabled() throws Exception {
    ResultPartitionWriter[] partitionWriters = new ResultPartitionWriter[2];
    try {
        for (int i = 0; i < partitionWriters.length; ++i) {
            partitionWriters[i] = PartitionTestUtils.createPartition(ResultPartitionType.PIPELINED_BOUNDED);
            partitionWriters[i].setup();
        }
        try (StreamTaskMailboxTestHarness<String> testHarness = new StreamTaskMailboxTestHarnessBuilder<>(OneInputStreamTask::new, STRING_TYPE_INFO).modifyStreamConfig(config -> config.setCheckpointingEnabled(false)).addInput(STRING_TYPE_INFO).addAdditionalOutput(partitionWriters).setupOperatorChain(new EmptyOperator()).finishForSingletonOperatorChain(StringSerializer.INSTANCE).build()) {
            testHarness.endInput();
            // In this case the result partition should not emit EndOfUserRecordsEvent.
            for (ResultPartitionWriter writer : partitionWriters) {
                assertEquals(0, ((PipelinedResultPartition) writer).getNumberOfQueuedBuffers());
            }
        }
    } finally {
        for (ResultPartitionWriter writer : partitionWriters) {
            if (writer != null) {
                writer.close();
            }
        }
    }
}
Also used : EndOfData(org.apache.flink.runtime.io.network.api.EndOfData) Deadline(org.apache.flink.api.common.time.Deadline) CheckpointMetricsBuilder(org.apache.flink.runtime.checkpoint.CheckpointMetricsBuilder) TestCheckpointResponder(org.apache.flink.runtime.taskmanager.TestCheckpointResponder) SavepointType(org.apache.flink.runtime.checkpoint.SavepointType) StringSerializer(org.apache.flink.api.common.typeutils.base.StringSerializer) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) ResultPartition(org.apache.flink.runtime.io.network.partition.ResultPartition) ListState(org.apache.flink.api.common.state.ListState) Future(java.util.concurrent.Future) CheckpointStorageLocationReference.getDefault(org.apache.flink.runtime.state.CheckpointStorageLocationReference.getDefault) Duration(java.time.Duration) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) CheckpointType(org.apache.flink.runtime.checkpoint.CheckpointType) TestInputChannel(org.apache.flink.runtime.io.network.partition.consumer.TestInputChannel) CheckpointOptions(org.apache.flink.runtime.checkpoint.CheckpointOptions) Collectors(java.util.stream.Collectors) StopMode(org.apache.flink.runtime.io.network.api.StopMode) PipelinedResultPartition(org.apache.flink.runtime.io.network.partition.PipelinedResultPartition) CheckpointBarrier(org.apache.flink.runtime.io.network.api.CheckpointBarrier) Matchers.contains(org.hamcrest.Matchers.contains) Assert.assertFalse(org.junit.Assert.assertFalse) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) OneInputStreamOperator(org.apache.flink.streaming.api.operators.OneInputStreamOperator) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) SavepointFormatType(org.apache.flink.core.execution.SavepointFormatType) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) EndOfPartitionEvent(org.apache.flink.runtime.io.network.api.EndOfPartitionEvent) Watermark(org.apache.flink.streaming.api.watermark.Watermark) ResultPartitionWriter(org.apache.flink.runtime.io.network.api.writer.ResultPartitionWriter) ResultPartitionType(org.apache.flink.runtime.io.network.partition.ResultPartitionType) CompletableFuture(java.util.concurrent.CompletableFuture) STRING_TYPE_INFO(org.apache.flink.api.common.typeinfo.BasicTypeInfo.STRING_TYPE_INFO) CompletingCheckpointResponder(org.apache.flink.streaming.util.CompletingCheckpointResponder) SourceFunction(org.apache.flink.streaming.api.functions.source.SourceFunction) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) Assert.assertArrayEquals(org.junit.Assert.assertArrayEquals) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Nullable(javax.annotation.Nullable) CheckpointStorageLocationReference(org.apache.flink.runtime.state.CheckpointStorageLocationReference) FlinkRuntimeException(org.apache.flink.util.FlinkRuntimeException) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) JobID(org.apache.flink.api.common.JobID) PartitionTestUtils(org.apache.flink.runtime.io.network.partition.PartitionTestUtils) CommonTestUtils(org.apache.flink.runtime.testutils.CommonTestUtils) StreamSource(org.apache.flink.streaming.api.operators.StreamSource) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) StateInitializationContext(org.apache.flink.runtime.state.StateInitializationContext) ResultPartitionWriter(org.apache.flink.runtime.io.network.api.writer.ResultPartitionWriter) Test(org.junit.Test)

Example 15 with ResultPartitionWriter

use of org.apache.flink.runtime.io.network.api.writer.ResultPartitionWriter in project flink by apache.

the class NetworkEnvironment method registerTask.

// --------------------------------------------------------------------------------------------
//  Task operations
// --------------------------------------------------------------------------------------------
public void registerTask(Task task) throws IOException {
    final ResultPartition[] producedPartitions = task.getProducedPartitions();
    final ResultPartitionWriter[] writers = task.getAllWriters();
    if (writers.length != producedPartitions.length) {
        throw new IllegalStateException("Unequal number of writers and partitions.");
    }
    synchronized (lock) {
        if (isShutdown) {
            throw new IllegalStateException("NetworkEnvironment is shut down");
        }
        for (int i = 0; i < producedPartitions.length; i++) {
            final ResultPartition partition = producedPartitions[i];
            final ResultPartitionWriter writer = writers[i];
            // Buffer pool for the partition
            BufferPool bufferPool = null;
            try {
                int maxNumberOfMemorySegments = partition.getPartitionType().isBounded() ? partition.getNumberOfSubpartitions() * networkBuffersPerChannel + extraNetworkBuffersPerGate : Integer.MAX_VALUE;
                bufferPool = networkBufferPool.createBufferPool(partition.getNumberOfSubpartitions(), maxNumberOfMemorySegments);
                partition.registerBufferPool(bufferPool);
                resultPartitionManager.registerResultPartition(partition);
            } catch (Throwable t) {
                if (bufferPool != null) {
                    bufferPool.lazyDestroy();
                }
                if (t instanceof IOException) {
                    throw (IOException) t;
                } else {
                    throw new IOException(t.getMessage(), t);
                }
            }
            // Register writer with task event dispatcher
            taskEventDispatcher.registerWriterForIncomingTaskEvents(writer.getPartitionId(), writer);
        }
        // Setup the buffer pool for each buffer reader
        final SingleInputGate[] inputGates = task.getAllInputGates();
        for (SingleInputGate gate : inputGates) {
            BufferPool bufferPool = null;
            try {
                int maxNumberOfMemorySegments = gate.getConsumedPartitionType().isBounded() ? gate.getNumberOfInputChannels() * networkBuffersPerChannel + extraNetworkBuffersPerGate : Integer.MAX_VALUE;
                bufferPool = networkBufferPool.createBufferPool(gate.getNumberOfInputChannels(), maxNumberOfMemorySegments);
                gate.setBufferPool(bufferPool);
            } catch (Throwable t) {
                if (bufferPool != null) {
                    bufferPool.lazyDestroy();
                }
                if (t instanceof IOException) {
                    throw (IOException) t;
                } else {
                    throw new IOException(t.getMessage(), t);
                }
            }
        }
    }
}
Also used : BufferPool(org.apache.flink.runtime.io.network.buffer.BufferPool) NetworkBufferPool(org.apache.flink.runtime.io.network.buffer.NetworkBufferPool) ResultPartitionWriter(org.apache.flink.runtime.io.network.api.writer.ResultPartitionWriter) IOException(java.io.IOException) SingleInputGate(org.apache.flink.runtime.io.network.partition.consumer.SingleInputGate) ResultPartition(org.apache.flink.runtime.io.network.partition.ResultPartition)

Aggregations

ResultPartitionWriter (org.apache.flink.runtime.io.network.api.writer.ResultPartitionWriter)37 ResultPartition (org.apache.flink.runtime.io.network.partition.ResultPartition)12 JobID (org.apache.flink.api.common.JobID)11 IOException (java.io.IOException)10 Test (org.junit.Test)10 CompletingCheckpointResponder (org.apache.flink.streaming.util.CompletingCheckpointResponder)8 FlinkRuntimeException (org.apache.flink.util.FlinkRuntimeException)8 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)7 EndOfData (org.apache.flink.runtime.io.network.api.EndOfData)7 CompletableFuture (java.util.concurrent.CompletableFuture)6 CheckpointMetaData (org.apache.flink.runtime.checkpoint.CheckpointMetaData)6 CancelTaskException (org.apache.flink.runtime.execution.CancelTaskException)6 StreamRecord (org.apache.flink.streaming.runtime.streamrecord.StreamRecord)6 ArrayList (java.util.ArrayList)5 Future (java.util.concurrent.Future)5 CheckpointMetrics (org.apache.flink.runtime.checkpoint.CheckpointMetrics)5 CheckpointOptions (org.apache.flink.runtime.checkpoint.CheckpointOptions)5 SavepointType (org.apache.flink.runtime.checkpoint.SavepointType)5 TaskStateSnapshot (org.apache.flink.runtime.checkpoint.TaskStateSnapshot)5 StopMode (org.apache.flink.runtime.io.network.api.StopMode)5