Search in sources :

Example 76 with Collector

use of org.apache.flink.util.Collector in project flink by apache.

the class StateDescriptorPassingTest method testProcessAllWindowState.

@Test
public void testProcessAllWindowState() {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.registerTypeWithKryoSerializer(File.class, JavaSerializer.class);
    // simulate ingestion time
    DataStream<File> src = env.fromElements(new File("/")).assignTimestampsAndWatermarks(WatermarkStrategy.<File>forMonotonousTimestamps().withTimestampAssigner((file, ts) -> System.currentTimeMillis()));
    SingleOutputStreamOperator<?> result = src.windowAll(TumblingEventTimeWindows.of(Time.milliseconds(1000))).process(new ProcessAllWindowFunction<File, String, TimeWindow>() {

        @Override
        public void process(Context ctx, Iterable<File> input, Collector<String> out) {
        }
    });
    validateListStateDescriptorConfigured(result);
}
Also used : Kryo(com.esotericsoftware.kryo.Kryo) Collector(org.apache.flink.util.Collector) TimeWindow(org.apache.flink.streaming.api.windowing.windows.TimeWindow) ProcessAllWindowFunction(org.apache.flink.streaming.api.functions.windowing.ProcessAllWindowFunction) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) ReduceFunction(org.apache.flink.api.common.functions.ReduceFunction) JavaSerializer(com.esotericsoftware.kryo.serializers.JavaSerializer) Time(org.apache.flink.streaming.api.windowing.time.Time) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) KeySelector(org.apache.flink.api.java.functions.KeySelector) StateDescriptor(org.apache.flink.api.common.state.StateDescriptor) KryoSerializer(org.apache.flink.api.java.typeutils.runtime.kryo.KryoSerializer) SingleOutputStreamOperator(org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator) WindowOperator(org.apache.flink.streaming.runtime.operators.windowing.WindowOperator) Assert.assertTrue(org.junit.Assert.assertTrue) WatermarkStrategy(org.apache.flink.api.common.eventtime.WatermarkStrategy) Test(org.junit.Test) ProcessWindowFunction(org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction) OneInputTransformation(org.apache.flink.streaming.api.transformations.OneInputTransformation) File(java.io.File) DataStream(org.apache.flink.streaming.api.datastream.DataStream) WindowFunction(org.apache.flink.streaming.api.functions.windowing.WindowFunction) TumblingEventTimeWindows(org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows) AllWindowFunction(org.apache.flink.streaming.api.functions.windowing.AllWindowFunction) ListSerializer(org.apache.flink.api.common.typeutils.base.ListSerializer) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) File(java.io.File) TimeWindow(org.apache.flink.streaming.api.windowing.windows.TimeWindow) Test(org.junit.Test)

Example 77 with Collector

use of org.apache.flink.util.Collector in project flink by apache.

the class SavepointITCase method testSavepointForJobWithIteration.

@Test
public void testSavepointForJobWithIteration() throws Exception {
    for (int i = 0; i < ITER_TEST_PARALLELISM; ++i) {
        iterTestSnapshotWait[i] = new OneShotLatch();
        iterTestRestoreWait[i] = new OneShotLatch();
        iterTestCheckpointVerify[i] = 0;
    }
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    final IntegerStreamSource source = new IntegerStreamSource();
    IterativeStream<Integer> iteration = env.addSource(source).flatMap(new RichFlatMapFunction<Integer, Integer>() {

        private static final long serialVersionUID = 1L;

        @Override
        public void flatMap(Integer in, Collector<Integer> clctr) throws Exception {
            clctr.collect(in);
        }
    }).setParallelism(ITER_TEST_PARALLELISM).keyBy(new KeySelector<Integer, Object>() {

        private static final long serialVersionUID = 1L;

        @Override
        public Object getKey(Integer value) throws Exception {
            return value;
        }
    }).flatMap(new DuplicateFilter()).setParallelism(ITER_TEST_PARALLELISM).iterate();
    DataStream<Integer> iterationBody = iteration.map(new MapFunction<Integer, Integer>() {

        private static final long serialVersionUID = 1L;

        @Override
        public Integer map(Integer value) throws Exception {
            return value;
        }
    }).setParallelism(ITER_TEST_PARALLELISM);
    iteration.closeWith(iterationBody);
    StreamGraph streamGraph = env.getStreamGraph();
    JobGraph jobGraph = streamGraph.getJobGraph();
    Configuration config = getFileBasedCheckpointsConfig();
    config.addAll(jobGraph.getJobConfiguration());
    config.set(TaskManagerOptions.MANAGED_MEMORY_SIZE, MemorySize.ZERO);
    MiniClusterWithClientResource cluster = new MiniClusterWithClientResource(new MiniClusterResourceConfiguration.Builder().setConfiguration(config).setNumberTaskManagers(1).setNumberSlotsPerTaskManager(2 * jobGraph.getMaximumParallelism()).build());
    cluster.before();
    ClusterClient<?> client = cluster.getClusterClient();
    String savepointPath = null;
    try {
        client.submitJob(jobGraph).get();
        waitForAllTaskRunning(cluster.getMiniCluster(), jobGraph.getJobID(), false);
        for (OneShotLatch latch : iterTestSnapshotWait) {
            latch.await();
        }
        savepointPath = client.triggerSavepoint(jobGraph.getJobID(), null, SavepointFormatType.CANONICAL).get();
        client.cancel(jobGraph.getJobID()).get();
        while (!client.getJobStatus(jobGraph.getJobID()).get().isGloballyTerminalState()) {
            Thread.sleep(100);
        }
        jobGraph = streamGraph.getJobGraph();
        jobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath));
        client.submitJob(jobGraph).get();
        for (OneShotLatch latch : iterTestRestoreWait) {
            latch.await();
        }
        client.cancel(jobGraph.getJobID()).get();
        while (!client.getJobStatus(jobGraph.getJobID()).get().isGloballyTerminalState()) {
            Thread.sleep(100);
        }
    } finally {
        if (null != savepointPath) {
            client.disposeSavepoint(savepointPath);
        }
        cluster.after();
    }
}
Also used : MiniClusterResourceConfiguration(org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration) Configuration(org.apache.flink.configuration.Configuration) MiniClusterWithClientResource(org.apache.flink.test.util.MiniClusterWithClientResource) KeySelector(org.apache.flink.api.java.functions.KeySelector) RichMapFunction(org.apache.flink.api.common.functions.RichMapFunction) MapFunction(org.apache.flink.api.common.functions.MapFunction) RichFlatMapFunction(org.apache.flink.api.common.functions.RichFlatMapFunction) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) RichFlatMapFunction(org.apache.flink.api.common.functions.RichFlatMapFunction) Collector(org.apache.flink.util.Collector) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) StreamGraph(org.apache.flink.streaming.api.graph.StreamGraph) MiniClusterResourceConfiguration(org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 78 with Collector

use of org.apache.flink.util.Collector in project flink by apache.

the class GroupCombineITCase method testCheckPartitionShuffleDOP1.

@Test
public // check if parallelism of 1 results in the same data like a shuffle
void testCheckPartitionShuffleDOP1() throws Exception {
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(1);
    // data
    DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
    // partition and group data
    UnsortedGrouping<Tuple3<Integer, Long, String>> partitionedDS = ds.partitionByHash(0).groupBy(1);
    List<Tuple2<Long, Integer>> result = partitionedDS.combineGroup(new GroupCombineFunction<Tuple3<Integer, Long, String>, Tuple2<Long, Integer>>() {

        @Override
        public void combine(Iterable<Tuple3<Integer, Long, String>> values, Collector<Tuple2<Long, Integer>> out) throws Exception {
            int count = 0;
            long key = 0;
            for (Tuple3<Integer, Long, String> value : values) {
                key = value.f1;
                count++;
            }
            out.collect(new Tuple2<>(key, count));
        }
    }).collect();
    String expected = "6,6\n" + "5,5\n" + "4,4\n" + "3,3\n" + "2,2\n" + "1,1\n";
    compareResultAsTuples(result, expected);
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) GroupCombineFunction(org.apache.flink.api.common.functions.GroupCombineFunction) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Tuple3(org.apache.flink.api.java.tuple.Tuple3) Collector(org.apache.flink.util.Collector) Test(org.junit.Test)

Example 79 with Collector

use of org.apache.flink.util.Collector in project flink by apache.

the class MiscellaneousIssuesITCase method testAccumulatorsAfterNoOp.

@Test
public void testAccumulatorsAfterNoOp() {
    final String accName = "test_accumulator";
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(6);
        env.generateSequence(1, 1000000).rebalance().flatMap(new RichFlatMapFunction<Long, Long>() {

            private LongCounter counter;

            @Override
            public void open(Configuration parameters) {
                counter = getRuntimeContext().getLongCounter(accName);
            }

            @Override
            public void flatMap(Long value, Collector<Long> out) {
                counter.add(1L);
            }
        }).output(new DiscardingOutputFormat<Long>());
        JobExecutionResult result = env.execute();
        assertEquals(1000000L, result.getAllAccumulatorResults().get(accName));
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : JobExecutionResult(org.apache.flink.api.common.JobExecutionResult) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Configuration(org.apache.flink.configuration.Configuration) MiniClusterResourceConfiguration(org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration) RichFlatMapFunction(org.apache.flink.api.common.functions.RichFlatMapFunction) Collector(org.apache.flink.util.Collector) LongCounter(org.apache.flink.api.common.accumulators.LongCounter) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) Test(org.junit.Test)

Example 80 with Collector

use of org.apache.flink.util.Collector in project flink by apache.

the class SideOutputITCase method testAllWindowLateArrivingEvents.

/**
 * Test window late arriving events stream.
 */
@Test
public void testAllWindowLateArrivingEvents() throws Exception {
    TestListResultSink<String> sideOutputResultSink = new TestListResultSink<>();
    StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
    see.setParallelism(1);
    DataStream<Integer> dataStream = see.fromCollection(elements);
    OutputTag<Integer> lateDataTag = new OutputTag<Integer>("late") {
    };
    SingleOutputStreamOperator<Integer> windowOperator = dataStream.assignTimestampsAndWatermarks(new TestWatermarkAssigner()).windowAll(SlidingEventTimeWindows.of(Time.milliseconds(1), Time.milliseconds(1))).sideOutputLateData(lateDataTag).apply(new AllWindowFunction<Integer, Integer, TimeWindow>() {

        private static final long serialVersionUID = 1L;

        @Override
        public void apply(TimeWindow window, Iterable<Integer> values, Collector<Integer> out) throws Exception {
            for (Integer val : values) {
                out.collect(val);
            }
        }
    });
    windowOperator.getSideOutput(lateDataTag).flatMap(new FlatMapFunction<Integer, String>() {

        private static final long serialVersionUID = 1L;

        @Override
        public void flatMap(Integer value, Collector<String> out) throws Exception {
            out.collect("late-" + String.valueOf(value));
        }
    }).addSink(sideOutputResultSink);
    see.execute();
    assertEquals(sideOutputResultSink.getSortedResult(), Arrays.asList("late-3", "late-4"));
}
Also used : TimeWindow(org.apache.flink.streaming.api.windowing.windows.TimeWindow) ExpectedException(org.junit.rules.ExpectedException) TestListResultSink(org.apache.flink.test.streaming.runtime.util.TestListResultSink) FlatMapFunction(org.apache.flink.api.common.functions.FlatMapFunction) Collector(org.apache.flink.util.Collector) OutputTag(org.apache.flink.util.OutputTag) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Aggregations

Collector (org.apache.flink.util.Collector)80 Test (org.junit.Test)60 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)33 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)32 Configuration (org.apache.flink.configuration.Configuration)27 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)19 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)18 ArrayList (java.util.ArrayList)16 DataStream (org.apache.flink.streaming.api.datastream.DataStream)16 TimeWindow (org.apache.flink.streaming.api.windowing.windows.TimeWindow)16 HashMap (java.util.HashMap)14 List (java.util.List)14 RuntimeContext (org.apache.flink.api.common.functions.RuntimeContext)14 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)12 IOException (java.io.IOException)11 Arrays (java.util.Arrays)11 Map (java.util.Map)11 FlatMapFunction (org.apache.flink.api.common.functions.FlatMapFunction)11 Assert.assertTrue (org.junit.Assert.assertTrue)11 InternalWindowFunction (org.apache.flink.streaming.runtime.operators.windowing.functions.InternalWindowFunction)10