Search in sources :

Example 1 with NumberSequenceSource

use of org.apache.flink.api.connector.source.lib.NumberSequenceSource in project flink by apache.

the class JobCancelingITCase method testCancelingWhileBackPressured.

@Test
public void testCancelingWhileBackPressured() throws Exception {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(PARALLELISM);
    env.getConfig().enableObjectReuse();
    // Basically disable interrupts and JVM killer watchdogs
    env.getConfig().setTaskCancellationTimeout(Duration.ofDays(1).toMillis());
    env.getConfig().setTaskCancellationInterval(Duration.ofDays(1).toMillis());
    // Check both FLIP-27 and normal sources
    final DataStreamSource<Long> source1 = env.fromSource(new NumberSequenceSource(1L, Long.MAX_VALUE), WatermarkStrategy.noWatermarks(), "source-1");
    // otherwise split enumerator will generate splits that can start emitting from very large
    // numbers, that do not work well with ExplodingFlatMapFunction
    source1.setParallelism(1);
    final DataStream<Long> source2 = env.addSource(new InfiniteLongSourceFunction());
    source1.connect(source2).flatMap(new ExplodingFlatMapFunction()).startNewChain().addSink(new SleepingSink());
    StreamGraph streamGraph = env.getStreamGraph();
    JobGraph jobGraph = streamGraph.getJobGraph();
    ClusterClient<?> client = MINI_CLUSTER.getClusterClient();
    JobID jobID = client.submitJob(jobGraph).get();
    waitForAllTaskRunning(MINI_CLUSTER.getMiniCluster(), jobID, false);
    // give a bit of time of back pressure to build up
    Thread.sleep(100);
    client.cancel(jobID).get();
    while (!client.getJobStatus(jobID).get().isTerminalState()) {
    }
    assertEquals(JobStatus.CANCELED, client.getJobStatus(jobID).get());
}
Also used : JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) StreamGraph(org.apache.flink.streaming.api.graph.StreamGraph) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) NumberSequenceSource(org.apache.flink.api.connector.source.lib.NumberSequenceSource) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 2 with NumberSequenceSource

use of org.apache.flink.api.connector.source.lib.NumberSequenceSource in project flink by apache.

the class SourceNAryInputChainingITCase method createProgramWithMultipleUnionInputs.

/**
 * Creates a DataStream program as shown below.
 *
 * <pre>
 *                                   +--------------+
 *             (src 1) --> (map) --> |              |
 *                                   |              |
 *           (src 2) --+             |              |
 *                     +-- UNION --> |              |
 *           (src 3) --+             |    N-Ary     |
 *                                   |   Operator   |
 *   (src 4) -> (map) --+            |              |
 *                      +-- UNION -> |              |
 *   (src 5) -> (map) --+            |              |
 *                                   |              |
 *                       (src 6) --> |              |
 *                                   +--------------+
 * </pre>
 */
private DataStream<Long> createProgramWithMultipleUnionInputs() {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(PARALLELISM);
    env.getConfig().enableObjectReuse();
    final DataStream<Long> source1 = env.fromSource(new NumberSequenceSource(1L, 10L), WatermarkStrategy.noWatermarks(), "source-1");
    final DataStream<Long> source2 = env.fromSource(new NumberSequenceSource(11L, 20L), WatermarkStrategy.noWatermarks(), "source-2");
    final DataStream<Long> source3 = env.fromSource(new NumberSequenceSource(21L, 30L), WatermarkStrategy.noWatermarks(), "source-3");
    final DataStream<Long> source4 = env.fromSource(new NumberSequenceSource(31L, 40L), WatermarkStrategy.noWatermarks(), "source-4");
    final DataStream<Long> source5 = env.fromSource(new NumberSequenceSource(41L, 50L), WatermarkStrategy.noWatermarks(), "source-5");
    final DataStream<Long> source6 = env.fromSource(new NumberSequenceSource(51L, 60L), WatermarkStrategy.noWatermarks(), "source-6");
    return nAryInputStreamOperation(source1.map((v) -> v), source2.union(source3), source4.map((v) -> v).union(source5.map((v) -> v)), source6);
}
Also used : MultipleInputTransformation(org.apache.flink.streaming.api.transformations.MultipleInputTransformation) NumberSequenceSource(org.apache.flink.api.connector.source.lib.NumberSequenceSource) MultipleConnectedStreams(org.apache.flink.streaming.api.datastream.MultipleConnectedStreams) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) AbstractStreamOperatorV2(org.apache.flink.streaming.api.operators.AbstractStreamOperatorV2) MiniClusterResourceConfiguration(org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration) AbstractInput(org.apache.flink.streaming.api.operators.AbstractInput) ArrayList(java.util.ArrayList) AbstractStreamOperatorFactory(org.apache.flink.streaming.api.operators.AbstractStreamOperatorFactory) ChainingStrategy(org.apache.flink.streaming.api.operators.ChainingStrategy) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) StreamGraph(org.apache.flink.streaming.api.graph.StreamGraph) TestLogger(org.apache.flink.util.TestLogger) Assert.fail(org.junit.Assert.fail) ClassRule(org.junit.ClassRule) Types(org.apache.flink.api.common.typeinfo.Types) MiniClusterWithClientResource(org.apache.flink.test.util.MiniClusterWithClientResource) DiscardingSink(org.apache.flink.streaming.api.functions.sink.DiscardingSink) DataStreamUtils(org.apache.flink.streaming.api.datastream.DataStreamUtils) WatermarkStrategy(org.apache.flink.api.common.eventtime.WatermarkStrategy) StreamOperatorParameters(org.apache.flink.streaming.api.operators.StreamOperatorParameters) Test(org.junit.Test) StreamingJobGraphGenerator(org.apache.flink.streaming.api.graph.StreamingJobGraphGenerator) DataStream(org.apache.flink.streaming.api.datastream.DataStream) StreamOperator(org.apache.flink.streaming.api.operators.StreamOperator) MultipleInputStreamOperator(org.apache.flink.streaming.api.operators.MultipleInputStreamOperator) List(java.util.List) TemporaryFolder(org.junit.rules.TemporaryFolder) Assert.assertEquals(org.junit.Assert.assertEquals) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Input(org.apache.flink.streaming.api.operators.Input) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) NumberSequenceSource(org.apache.flink.api.connector.source.lib.NumberSequenceSource)

Example 3 with NumberSequenceSource

use of org.apache.flink.api.connector.source.lib.NumberSequenceSource in project flink by apache.

the class SourceNAryInputChainingITCase method createProgramWithMixedInputs.

/**
 * Creates a DataStream program as shown below.
 *
 * <pre>
 *                         +--------------+
 *   (src 1) --> (map) --> |              |
 *                         |     N-Ary    |
 *             (src 2) --> |              |
 *                         |   Operator   |
 *   (src 3) --> (map) --> |              |
 *                         +--------------+
 * </pre>
 */
private DataStream<Long> createProgramWithMixedInputs() {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(PARALLELISM);
    env.getConfig().enableObjectReuse();
    final DataStream<Long> source1 = env.fromSource(new NumberSequenceSource(1L, 10L), WatermarkStrategy.noWatermarks(), "source-1");
    final DataStream<Long> source2 = env.fromSource(new NumberSequenceSource(11L, 20L), WatermarkStrategy.noWatermarks(), "source-2");
    final DataStream<Long> source3 = env.fromSource(new NumberSequenceSource(21L, 30L), WatermarkStrategy.noWatermarks(), "source-3");
    final DataStream<Long> stream1 = source1.map(v -> v);
    final DataStream<Long> stream3 = source3.map(v -> v);
    return nAryInputStreamOperation(stream1, source2, stream3);
}
Also used : StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) NumberSequenceSource(org.apache.flink.api.connector.source.lib.NumberSequenceSource)

Example 4 with NumberSequenceSource

use of org.apache.flink.api.connector.source.lib.NumberSequenceSource in project flink by apache.

the class StreamingJobGraphGeneratorTest method testYieldingOperatorProperlyChainedOnNewSources.

/**
 * Tests that {@link org.apache.flink.streaming.api.operators.YieldingOperatorFactory} are
 * chained to new sources, see FLINK-20444.
 */
@Test
public void testYieldingOperatorProperlyChainedOnNewSources() {
    StreamExecutionEnvironment chainEnv = StreamExecutionEnvironment.createLocalEnvironment(1);
    chainEnv.fromSource(new NumberSequenceSource(0, 10), WatermarkStrategy.noWatermarks(), "input").map((x) -> x).transform("test", BasicTypeInfo.LONG_TYPE_INFO, new YieldingTestOperatorFactory<>()).addSink(new DiscardingSink<>());
    final JobGraph jobGraph = chainEnv.getStreamGraph().getJobGraph();
    final List<JobVertex> vertices = jobGraph.getVerticesSortedTopologicallyFromSources();
    Assert.assertEquals(1, vertices.size());
    assertEquals(4, vertices.get(0).getOperatorIDs().size());
}
Also used : JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) NumberSequenceSource(org.apache.flink.api.connector.source.lib.NumberSequenceSource) Test(org.junit.Test)

Example 5 with NumberSequenceSource

use of org.apache.flink.api.connector.source.lib.NumberSequenceSource in project flink by apache.

the class StreamingJobGraphGeneratorTest method createJobGraphWithDescription.

private JobGraph createJobGraphWithDescription(StreamExecutionEnvironment env, String... inputNames) {
    env.setParallelism(1);
    DataStream<Long> source;
    if (inputNames.length == 1) {
        source = env.fromElements(1L, 2L, 3L).setDescription(inputNames[0]);
    } else {
        MultipleInputTransformation<Long> transform = new MultipleInputTransformation<>("mit", new UnusedOperatorFactory(), Types.LONG, env.getParallelism());
        transform.setDescription("operator chained with source");
        transform.setChainingStrategy(ChainingStrategy.HEAD_WITH_SOURCES);
        Arrays.stream(inputNames).map(name -> env.fromSource(new NumberSequenceSource(1, 2), WatermarkStrategy.noWatermarks(), name).setDescription(name).getTransformation()).forEach(transform::addInput);
        source = new DataStream<>(env, transform);
    }
    DataStream<Long> map1 = source.map(x -> x + 1).setDescription("x + 1");
    DataStream<Long> map2 = source.map(x -> x + 2).setDescription("x + 2");
    map1.print().setDescription("first print of map1");
    map1.print().setDescription("second print of map1");
    map2.print().setDescription("first print of map2");
    map2.print().setDescription("second print of map2");
    return StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());
}
Also used : Arrays(java.util.Arrays) Tuple2(org.apache.flink.api.java.tuple.Tuple2) TypeSerializerInputFormat(org.apache.flink.api.java.io.TypeSerializerInputFormat) YieldingOperatorFactory(org.apache.flink.streaming.api.operators.YieldingOperatorFactory) AbstractStreamOperatorFactory(org.apache.flink.streaming.api.operators.AbstractStreamOperatorFactory) UserCodeWrapper(org.apache.flink.api.common.operators.util.UserCodeWrapper) ResourceSpec(org.apache.flink.api.common.operators.ResourceSpec) ManagedMemoryUseCase(org.apache.flink.core.memory.ManagedMemoryUseCase) Map(java.util.Map) CoLocationGroup(org.apache.flink.runtime.jobmanager.scheduler.CoLocationGroup) ForwardPartitioner(org.apache.flink.streaming.runtime.partitioner.ForwardPartitioner) SinkFunction(org.apache.flink.streaming.api.functions.sink.SinkFunction) TaskConfig(org.apache.flink.runtime.operators.util.TaskConfig) Set(java.util.Set) FlatMapFunction(org.apache.flink.api.common.functions.FlatMapFunction) FilterFunction(org.apache.flink.api.common.functions.FilterFunction) Assert.assertFalse(org.junit.Assert.assertFalse) StreamingJobGraphGenerator.areOperatorsChainable(org.apache.flink.streaming.api.graph.StreamingJobGraphGenerator.areOperatorsChainable) Boundedness(org.apache.flink.api.connector.source.Boundedness) OneInputStreamOperatorFactory(org.apache.flink.streaming.api.operators.OneInputStreamOperatorFactory) MultipleInputTransformation(org.apache.flink.streaming.api.transformations.MultipleInputTransformation) NumberSequenceSource(org.apache.flink.api.connector.source.lib.NumberSequenceSource) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) ArrayList(java.util.ArrayList) TaskManagerOptions(org.apache.flink.configuration.TaskManagerOptions) Collector(org.apache.flink.util.Collector) Iterables(org.apache.flink.shaded.guava30.com.google.common.collect.Iterables) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Types(org.apache.flink.api.common.typeinfo.Types) DataStreamSink(org.apache.flink.streaming.api.datastream.DataStreamSink) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) MailboxExecutor(org.apache.flink.api.common.operators.MailboxExecutor) SingleOutputStreamOperator(org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) OneInputTransformation(org.apache.flink.streaming.api.transformations.OneInputTransformation) Assert.assertNotEquals(org.junit.Assert.assertNotEquals) StreamOperator(org.apache.flink.streaming.api.operators.StreamOperator) Assert.assertNull(org.junit.Assert.assertNull) Matcher(org.hamcrest.Matcher) Transformation(org.apache.flink.api.dag.Transformation) Assert(org.junit.Assert) SavepointRestoreSettings(org.apache.flink.runtime.jobgraph.SavepointRestoreSettings) Assert.assertEquals(org.junit.Assert.assertEquals) CoreMatchers.is(org.hamcrest.CoreMatchers.is) PipelineOptions(org.apache.flink.configuration.PipelineOptions) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) CheckpointingMode(org.apache.flink.streaming.api.CheckpointingMode) MapFunction(org.apache.flink.api.common.functions.MapFunction) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) ChainingStrategy(org.apache.flink.streaming.api.operators.ChainingStrategy) TestLogger(org.apache.flink.util.TestLogger) InputFormat(org.apache.flink.api.common.io.InputFormat) Assert.fail(org.junit.Assert.fail) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) Method(java.lang.reflect.Method) OutputFormat(org.apache.flink.api.common.io.OutputFormat) JobCheckpointingSettings(org.apache.flink.runtime.jobgraph.tasks.JobCheckpointingSettings) PartitionTransformation(org.apache.flink.streaming.api.transformations.PartitionTransformation) WatermarkStrategy(org.apache.flink.api.common.eventtime.WatermarkStrategy) Collectors(java.util.stream.Collectors) ResourceProfile(org.apache.flink.runtime.clusterframework.types.ResourceProfile) SimpleOperatorFactory(org.apache.flink.streaming.api.operators.SimpleOperatorFactory) List(java.util.List) MultipleInputStreamTask(org.apache.flink.streaming.runtime.tasks.MultipleInputStreamTask) SerializedValue(org.apache.flink.util.SerializedValue) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) CheckpointConfig(org.apache.flink.streaming.api.environment.CheckpointConfig) ParallelSourceFunction(org.apache.flink.streaming.api.functions.source.ParallelSourceFunction) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) SlotSharingGroup(org.apache.flink.runtime.jobmanager.scheduler.SlotSharingGroup) IterativeStream(org.apache.flink.streaming.api.datastream.IterativeStream) StreamOperatorFactory(org.apache.flink.streaming.api.operators.StreamOperatorFactory) InputOutputFormatVertex(org.apache.flink.runtime.jobgraph.InputOutputFormatVertex) ResultPartitionType(org.apache.flink.runtime.io.network.partition.ResultPartitionType) HashMap(java.util.HashMap) DataStreamSource(org.apache.flink.streaming.api.datastream.DataStreamSource) JobType(org.apache.flink.runtime.jobgraph.JobType) SourceOperatorFactory(org.apache.flink.streaming.api.operators.SourceOperatorFactory) MockSource(org.apache.flink.api.connector.source.mocks.MockSource) SourceOperatorStreamTask(org.apache.flink.streaming.runtime.tasks.SourceOperatorStreamTask) StreamMap(org.apache.flink.streaming.api.operators.StreamMap) ReduceFunction(org.apache.flink.api.common.functions.ReduceFunction) RebalancePartitioner(org.apache.flink.streaming.runtime.partitioner.RebalancePartitioner) DiscardingSink(org.apache.flink.streaming.api.functions.sink.DiscardingSink) Assert.assertNotNull(org.junit.Assert.assertNotNull) Configuration(org.apache.flink.configuration.Configuration) CoordinatedOperatorFactory(org.apache.flink.streaming.api.operators.CoordinatedOperatorFactory) StreamOperatorParameters(org.apache.flink.streaming.api.operators.StreamOperatorParameters) InputFormatSourceFunction(org.apache.flink.streaming.api.functions.source.InputFormatSourceFunction) DataStream(org.apache.flink.streaming.api.datastream.DataStream) RescalePartitioner(org.apache.flink.streaming.runtime.partitioner.RescalePartitioner) FeatureMatcher(org.hamcrest.FeatureMatcher) StreamExchangeMode(org.apache.flink.streaming.api.transformations.StreamExchangeMode) TestAnyModeReadingStreamOperator(org.apache.flink.streaming.util.TestAnyModeReadingStreamOperator) OperatorCoordinator(org.apache.flink.runtime.operators.coordination.OperatorCoordinator) InputOutputFormatContainer(org.apache.flink.runtime.jobgraph.InputOutputFormatContainer) Comparator(java.util.Comparator) RuntimeExecutionMode(org.apache.flink.api.common.RuntimeExecutionMode) Collections(java.util.Collections) NumberSequenceSource(org.apache.flink.api.connector.source.lib.NumberSequenceSource) MultipleInputTransformation(org.apache.flink.streaming.api.transformations.MultipleInputTransformation)

Aggregations

NumberSequenceSource (org.apache.flink.api.connector.source.lib.NumberSequenceSource)9 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)9 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)6 Test (org.junit.Test)6 ArrayList (java.util.ArrayList)4 List (java.util.List)4 WatermarkStrategy (org.apache.flink.api.common.eventtime.WatermarkStrategy)4 Types (org.apache.flink.api.common.typeinfo.Types)4 DataStream (org.apache.flink.streaming.api.datastream.DataStream)4 DiscardingSink (org.apache.flink.streaming.api.functions.sink.DiscardingSink)4 AbstractStreamOperatorFactory (org.apache.flink.streaming.api.operators.AbstractStreamOperatorFactory)4 ChainingStrategy (org.apache.flink.streaming.api.operators.ChainingStrategy)4 StreamOperator (org.apache.flink.streaming.api.operators.StreamOperator)4 StreamOperatorParameters (org.apache.flink.streaming.api.operators.StreamOperatorParameters)4 MultipleInputTransformation (org.apache.flink.streaming.api.transformations.MultipleInputTransformation)4 TestLogger (org.apache.flink.util.TestLogger)4 Assert.assertEquals (org.junit.Assert.assertEquals)4 Assert.fail (org.junit.Assert.fail)4 Method (java.lang.reflect.Method)2 Arrays (java.util.Arrays)2