Search in sources :

Example 6 with SourceOperatorFactory

use of org.apache.flink.streaming.api.operators.SourceOperatorFactory in project flink by apache.

the class SourceOperatorStreamTaskTest method testTriggeringStopWithSavepointWithDrain.

@Test
public void testTriggeringStopWithSavepointWithDrain() throws Exception {
    SourceOperatorFactory<Integer> sourceOperatorFactory = new SourceOperatorFactory<>(new MockSource(Boundedness.CONTINUOUS_UNBOUNDED, 2), WatermarkStrategy.noWatermarks());
    CompletableFuture<Boolean> checkpointCompleted = new CompletableFuture<>();
    CheckpointResponder checkpointResponder = new TestCheckpointResponder() {

        @Override
        public void acknowledgeCheckpoint(JobID jobID, ExecutionAttemptID executionAttemptID, long checkpointId, CheckpointMetrics checkpointMetrics, TaskStateSnapshot subtaskState) {
            super.acknowledgeCheckpoint(jobID, executionAttemptID, checkpointId, checkpointMetrics, subtaskState);
            checkpointCompleted.complete(null);
        }
    };
    try (StreamTaskMailboxTestHarness<Integer> testHarness = new StreamTaskMailboxTestHarnessBuilder<>(SourceOperatorStreamTask::new, BasicTypeInfo.INT_TYPE_INFO).setupOutputForSingletonOperatorChain(sourceOperatorFactory).setCheckpointResponder(checkpointResponder).build()) {
        CompletableFuture<Boolean> triggerResult = testHarness.streamTask.triggerCheckpointAsync(new CheckpointMetaData(2, 2), CheckpointOptions.alignedNoTimeout(SavepointType.terminate(SavepointFormatType.CANONICAL), CheckpointStorageLocationReference.getDefault()));
        checkpointCompleted.whenComplete((ignored, exception) -> testHarness.streamTask.notifyCheckpointCompleteAsync(2));
        testHarness.waitForTaskCompletion();
        testHarness.finishProcessing();
        assertTrue(triggerResult.isDone());
        assertTrue(triggerResult.get());
        assertTrue(checkpointCompleted.isDone());
    }
}
Also used : MockSource(org.apache.flink.api.connector.source.mocks.MockSource) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) TestCheckpointResponder(org.apache.flink.runtime.taskmanager.TestCheckpointResponder) CheckpointResponder(org.apache.flink.runtime.taskmanager.CheckpointResponder) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) SourceOperatorFactory(org.apache.flink.streaming.api.operators.SourceOperatorFactory) CompletableFuture(java.util.concurrent.CompletableFuture) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) TestCheckpointResponder(org.apache.flink.runtime.taskmanager.TestCheckpointResponder) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 7 with SourceOperatorFactory

use of org.apache.flink.streaming.api.operators.SourceOperatorFactory in project flink by apache.

the class SourceOperatorStreamTaskTest method testSkipExecutionIfFinishedOnRestore.

@Test
public void testSkipExecutionIfFinishedOnRestore() throws Exception {
    TaskStateSnapshot taskStateSnapshot = TaskStateSnapshot.FINISHED_ON_RESTORE;
    LifeCycleMonitorSource testingSource = new LifeCycleMonitorSource(Boundedness.CONTINUOUS_UNBOUNDED, 10);
    SourceOperatorFactory<Integer> sourceOperatorFactory = new SourceOperatorFactory<>(testingSource, WatermarkStrategy.noWatermarks());
    List<Object> output = new ArrayList<>();
    try (StreamTaskMailboxTestHarness<Integer> testHarness = new StreamTaskMailboxTestHarnessBuilder<>(SourceOperatorStreamTask::new, BasicTypeInfo.INT_TYPE_INFO).setTaskStateSnapshot(1, taskStateSnapshot).addAdditionalOutput(new RecordOrEventCollectingResultPartitionWriter<StreamElement>(output, new StreamElementSerializer<>(IntSerializer.INSTANCE)) {

        @Override
        public void notifyEndOfData(StopMode mode) throws IOException {
            broadcastEvent(new EndOfData(mode), false);
        }
    }).setupOperatorChain(sourceOperatorFactory).chain(new TestFinishedOnRestoreStreamOperator(), StringSerializer.INSTANCE).finish().build()) {
        testHarness.getStreamTask().invoke();
        testHarness.processAll();
        assertThat(output, contains(Watermark.MAX_WATERMARK, new EndOfData(StopMode.DRAIN)));
        LifeCycleMonitorSourceReader sourceReader = (LifeCycleMonitorSourceReader) ((SourceOperator<?, ?>) testHarness.getStreamTask().getMainOperator()).getSourceReader();
        sourceReader.getLifeCycleMonitor().assertCallTimes(0, LifeCyclePhase.values());
    }
}
Also used : ArrayList(java.util.ArrayList) StopMode(org.apache.flink.runtime.io.network.api.StopMode) StreamElement(org.apache.flink.streaming.runtime.streamrecord.StreamElement) IOException(java.io.IOException) SourceOperatorFactory(org.apache.flink.streaming.api.operators.SourceOperatorFactory) EndOfData(org.apache.flink.runtime.io.network.api.EndOfData) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) StreamElementSerializer(org.apache.flink.streaming.runtime.streamrecord.StreamElementSerializer) Test(org.junit.Test)

Example 8 with SourceOperatorFactory

use of org.apache.flink.streaming.api.operators.SourceOperatorFactory in project flink by apache.

the class StreamingJobGraphGeneratorTest method testCoordinatedOperator.

@Test
public void testCoordinatedOperator() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStream<Integer> source = env.fromSource(new MockSource(Boundedness.BOUNDED, 1), WatermarkStrategy.noWatermarks(), "TestSource");
    source.addSink(new DiscardingSink<>());
    StreamGraph streamGraph = env.getStreamGraph();
    JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(streamGraph);
    // There should be only one job vertex.
    assertEquals(1, jobGraph.getNumberOfVertices());
    JobVertex jobVertex = jobGraph.getVerticesAsArray()[0];
    List<SerializedValue<OperatorCoordinator.Provider>> coordinatorProviders = jobVertex.getOperatorCoordinators();
    // There should be only one coordinator provider.
    assertEquals(1, coordinatorProviders.size());
    // The invokable class should be SourceOperatorStreamTask.
    final ClassLoader classLoader = getClass().getClassLoader();
    assertEquals(SourceOperatorStreamTask.class, jobVertex.getInvokableClass(classLoader));
    StreamOperatorFactory operatorFactory = new StreamConfig(jobVertex.getConfiguration()).getStreamOperatorFactory(classLoader);
    assertTrue(operatorFactory instanceof SourceOperatorFactory);
}
Also used : MockSource(org.apache.flink.api.connector.source.mocks.MockSource) AbstractStreamOperatorFactory(org.apache.flink.streaming.api.operators.AbstractStreamOperatorFactory) OneInputStreamOperatorFactory(org.apache.flink.streaming.api.operators.OneInputStreamOperatorFactory) StreamOperatorFactory(org.apache.flink.streaming.api.operators.StreamOperatorFactory) OperatorCoordinator(org.apache.flink.runtime.operators.coordination.OperatorCoordinator) SerializedValue(org.apache.flink.util.SerializedValue) SourceOperatorFactory(org.apache.flink.streaming.api.operators.SourceOperatorFactory) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 9 with SourceOperatorFactory

use of org.apache.flink.streaming.api.operators.SourceOperatorFactory in project flink by apache.

the class StreamingJobGraphGenerator method buildChainedInputsAndGetHeadInputs.

private Map<Integer, OperatorChainInfo> buildChainedInputsAndGetHeadInputs(final Map<Integer, byte[]> hashes, final List<Map<Integer, byte[]>> legacyHashes) {
    final Map<Integer, ChainedSourceInfo> chainedSources = new HashMap<>();
    final Map<Integer, OperatorChainInfo> chainEntryPoints = new HashMap<>();
    for (Integer sourceNodeId : streamGraph.getSourceIDs()) {
        final StreamNode sourceNode = streamGraph.getStreamNode(sourceNodeId);
        if (sourceNode.getOperatorFactory() instanceof SourceOperatorFactory && sourceNode.getOutEdges().size() == 1) {
            // as long as only NAry ops support this chaining, we need to skip the other parts
            final StreamEdge sourceOutEdge = sourceNode.getOutEdges().get(0);
            final StreamNode target = streamGraph.getStreamNode(sourceOutEdge.getTargetId());
            final ChainingStrategy targetChainingStrategy = target.getOperatorFactory().getChainingStrategy();
            if (targetChainingStrategy == ChainingStrategy.HEAD_WITH_SOURCES && isChainableInput(sourceOutEdge, streamGraph)) {
                final OperatorID opId = new OperatorID(hashes.get(sourceNodeId));
                final StreamConfig.SourceInputConfig inputConfig = new StreamConfig.SourceInputConfig(sourceOutEdge);
                final StreamConfig operatorConfig = new StreamConfig(new Configuration());
                setVertexConfig(sourceNodeId, operatorConfig, Collections.emptyList(), Collections.emptyList(), Collections.emptyMap());
                // sources are always first
                operatorConfig.setChainIndex(0);
                operatorConfig.setOperatorID(opId);
                operatorConfig.setOperatorName(sourceNode.getOperatorName());
                chainedSources.put(sourceNodeId, new ChainedSourceInfo(operatorConfig, inputConfig));
                final SourceOperatorFactory<?> sourceOpFact = (SourceOperatorFactory<?>) sourceNode.getOperatorFactory();
                final OperatorCoordinator.Provider coord = sourceOpFact.getCoordinatorProvider(sourceNode.getOperatorName(), opId);
                final OperatorChainInfo chainInfo = chainEntryPoints.computeIfAbsent(sourceOutEdge.getTargetId(), (k) -> new OperatorChainInfo(sourceOutEdge.getTargetId(), hashes, legacyHashes, chainedSources, streamGraph));
                chainInfo.addCoordinatorProvider(coord);
                continue;
            }
        }
        chainEntryPoints.put(sourceNodeId, new OperatorChainInfo(sourceNodeId, hashes, legacyHashes, chainedSources, streamGraph));
    }
    return chainEntryPoints;
}
Also used : CheckpointCoordinatorConfiguration(org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration) Configuration(org.apache.flink.configuration.Configuration) IdentityHashMap(java.util.IdentityHashMap) HashMap(java.util.HashMap) OperatorCoordinator(org.apache.flink.runtime.operators.coordination.OperatorCoordinator) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) SourceOperatorFactory(org.apache.flink.streaming.api.operators.SourceOperatorFactory) ChainingStrategy(org.apache.flink.streaming.api.operators.ChainingStrategy) AtomicInteger(java.util.concurrent.atomic.AtomicInteger)

Example 10 with SourceOperatorFactory

use of org.apache.flink.streaming.api.operators.SourceOperatorFactory in project flink by apache.

the class MultipleInputStreamTaskChainedSourcesCheckpointingTest method testStopWithSavepointDrainWaitsForSourcesFinish.

/**
 * In this scenario:
 *
 * <ul>
 *   <li>Network inputs are processed until CheckpointBarriers for synchronous savepoint are
 *       processed.
 *   <li>RPC for stop-with-savepoint comes for sources
 *   <li>Sources keep being invoked until they return END_OF_DATA
 *   <li>Synchronous savepoint is triggered
 * </ul>
 */
@Test
public void testStopWithSavepointDrainWaitsForSourcesFinish() throws Exception {
    try (StreamTaskMailboxTestHarness<String> testHarness = new StreamTaskMailboxTestHarnessBuilder<>(MultipleInputStreamTask::new, BasicTypeInfo.STRING_TYPE_INFO).setCollectNetworkEvents().modifyExecutionConfig(applyObjectReuse(objectReuse)).modifyStreamConfig(config -> config.setCheckpointingEnabled(true)).addInput(BasicTypeInfo.STRING_TYPE_INFO).addSourceInput(new SourceOperatorFactory<>(new MockSource(Boundedness.CONTINUOUS_UNBOUNDED, 1), WatermarkStrategy.noWatermarks()), BasicTypeInfo.INT_TYPE_INFO).addSourceInput(new SourceOperatorFactory<>(new MockSource(Boundedness.CONTINUOUS_UNBOUNDED, 1), WatermarkStrategy.noWatermarks()), BasicTypeInfo.INT_TYPE_INFO).addInput(BasicTypeInfo.DOUBLE_TYPE_INFO).setupOutputForSingletonOperatorChain(new MapToStringMultipleInputOperatorFactory(4, true)).build()) {
        testHarness.setAutoProcess(false);
        ArrayDeque<Object> expectedOutput = new ArrayDeque<>();
        CheckpointBarrier barrier = createStopWithSavepointDrainBarrier();
        testHarness.processElement(new StreamRecord<>("44", TimestampAssigner.NO_TIMESTAMP), 0);
        testHarness.processEvent(new EndOfData(StopMode.DRAIN), 0);
        testHarness.processEvent(barrier, 0);
        testHarness.processElement(new StreamRecord<>(47d, TimestampAssigner.NO_TIMESTAMP), 1);
        testHarness.processEvent(new EndOfData(StopMode.DRAIN), 1);
        testHarness.processEvent(barrier, 1);
        addSourceRecords(testHarness, 1, Boundedness.CONTINUOUS_UNBOUNDED, 1, 2);
        addSourceRecords(testHarness, 2, Boundedness.CONTINUOUS_UNBOUNDED, 3, 4);
        testHarness.processAll();
        Future<Boolean> checkpointFuture = testHarness.getStreamTask().triggerCheckpointAsync(metaData, barrier.getCheckpointOptions());
        processSingleStepUntil(testHarness, checkpointFuture::isDone);
        expectedOutput.add(new StreamRecord<>("3", TimestampAssigner.NO_TIMESTAMP));
        expectedOutput.add(new StreamRecord<>("47.0", TimestampAssigner.NO_TIMESTAMP));
        expectedOutput.add(new StreamRecord<>("44", TimestampAssigner.NO_TIMESTAMP));
        expectedOutput.add(new StreamRecord<>("1", TimestampAssigner.NO_TIMESTAMP));
        expectedOutput.add(new StreamRecord<>("4", TimestampAssigner.NO_TIMESTAMP));
        expectedOutput.add(new StreamRecord<>("2", TimestampAssigner.NO_TIMESTAMP));
        ArrayList<Object> actualOutput = new ArrayList<>(testHarness.getOutput());
        assertThat(actualOutput.subList(0, expectedOutput.size()), containsInAnyOrder(expectedOutput.toArray()));
        assertThat(actualOutput.subList(actualOutput.size() - 3, actualOutput.size()), contains(new StreamRecord<>("FINISH"), new EndOfData(StopMode.DRAIN), barrier));
    }
}
Also used : MockSource(org.apache.flink.api.connector.source.mocks.MockSource) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) ArrayList(java.util.ArrayList) SourceOperatorFactory(org.apache.flink.streaming.api.operators.SourceOperatorFactory) ArrayDeque(java.util.ArrayDeque) CheckpointBarrier(org.apache.flink.runtime.io.network.api.CheckpointBarrier) EndOfData(org.apache.flink.runtime.io.network.api.EndOfData) MapToStringMultipleInputOperatorFactory(org.apache.flink.streaming.runtime.tasks.MultipleInputStreamTaskTest.MapToStringMultipleInputOperatorFactory) Test(org.junit.Test)

Aggregations

SourceOperatorFactory (org.apache.flink.streaming.api.operators.SourceOperatorFactory)12 Test (org.junit.Test)8 ArrayList (java.util.ArrayList)6 MockSource (org.apache.flink.api.connector.source.mocks.MockSource)6 EndOfData (org.apache.flink.runtime.io.network.api.EndOfData)4 OperatorID (org.apache.flink.runtime.jobgraph.OperatorID)4 HashMap (java.util.HashMap)3 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)3 TaskStateSnapshot (org.apache.flink.runtime.checkpoint.TaskStateSnapshot)3 StreamTaskFinalCheckpointsTest.triggerCheckpoint (org.apache.flink.streaming.runtime.tasks.StreamTaskFinalCheckpointsTest.triggerCheckpoint)3 IOException (java.io.IOException)2 ArrayDeque (java.util.ArrayDeque)2 List (java.util.List)2 CompletableFuture (java.util.concurrent.CompletableFuture)2 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2 JobID (org.apache.flink.api.common.JobID)2 Counter (org.apache.flink.metrics.Counter)2 OperatorMetricGroup (org.apache.flink.metrics.groups.OperatorMetricGroup)2 CheckpointMetaData (org.apache.flink.runtime.checkpoint.CheckpointMetaData)2 CheckpointMetrics (org.apache.flink.runtime.checkpoint.CheckpointMetrics)2