Search in sources :

Example 56 with DataStream

use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.

the class AbstractQueryableStateTestBase method testDuplicateRegistrationFailsJob.

/**
 * Tests that duplicate query registrations fail the job at the JobManager.
 */
@Test(timeout = 60_000)
public void testDuplicateRegistrationFailsJob() throws Exception {
    final int numKeys = 256;
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setStateBackend(stateBackend);
    env.setParallelism(maxParallelism);
    // Very important, because cluster is shared between tests and we
    // don't explicitly check that all slots are available before
    // submitting.
    env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000L));
    DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestKeyRangeSource(numKeys));
    // Reducing state
    ReducingStateDescriptor<Tuple2<Integer, Long>> reducingState = new ReducingStateDescriptor<>("any-name", new SumReduce(), source.getType());
    final String queryName = "duplicate-me";
    final QueryableStateStream<Integer, Tuple2<Integer, Long>> queryableState = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {

        private static final long serialVersionUID = -4126824763829132959L;

        @Override
        public Integer getKey(Tuple2<Integer, Long> value) {
            return value.f0;
        }
    }).asQueryableState(queryName, reducingState);
    final QueryableStateStream<Integer, Tuple2<Integer, Long>> duplicate = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {

        private static final long serialVersionUID = -6265024000462809436L;

        @Override
        public Integer getKey(Tuple2<Integer, Long> value) {
            return value.f0;
        }
    }).asQueryableState(queryName);
    // Submit the job graph
    final JobGraph jobGraph = env.getStreamGraph().getJobGraph();
    clusterClient.submitJob(jobGraph).thenCompose(clusterClient::requestJobResult).thenApply(JobResult::getSerializedThrowable).thenAccept(serializedThrowable -> {
        assertTrue(serializedThrowable.isPresent());
        final Throwable t = serializedThrowable.get().deserializeError(getClass().getClassLoader());
        final String failureCause = ExceptionUtils.stringifyException(t);
        assertThat(failureCause, containsString("KvState with name '" + queryName + "' has already been registered by another operator"));
    }).get();
}
Also used : Deadline(org.apache.flink.api.common.time.Deadline) Arrays(java.util.Arrays) ReducingStateDescriptor(org.apache.flink.api.common.state.ReducingStateDescriptor) Tuple2(org.apache.flink.api.java.tuple.Tuple2) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) ClassLoaderUtils(org.apache.flink.testutils.ClassLoaderUtils) ExceptionUtils(org.apache.flink.util.ExceptionUtils) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) Assert.assertThat(org.junit.Assert.assertThat) ListState(org.apache.flink.api.common.state.ListState) AggregateFunction(org.apache.flink.api.common.functions.AggregateFunction) StateBackend(org.apache.flink.runtime.state.StateBackend) URLClassLoader(java.net.URLClassLoader) AggregatingState(org.apache.flink.api.common.state.AggregatingState) CheckpointListener(org.apache.flink.api.common.state.CheckpointListener) ReducingState(org.apache.flink.api.common.state.ReducingState) QueryableStateStream(org.apache.flink.streaming.api.datastream.QueryableStateStream) Duration(java.time.Duration) Map(java.util.Map) TestLogger(org.apache.flink.util.TestLogger) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) Assert.fail(org.junit.Assert.fail) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) ClassRule(org.junit.ClassRule) State(org.apache.flink.api.common.state.State) KeySelector(org.apache.flink.api.java.functions.KeySelector) ScheduledExecutor(org.apache.flink.util.concurrent.ScheduledExecutor) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) CancellationException(java.util.concurrent.CancellationException) Set(java.util.Set) CompletionException(java.util.concurrent.CompletionException) Preconditions(org.apache.flink.util.Preconditions) Executors(java.util.concurrent.Executors) Serializable(java.io.Serializable) TestingUtils(org.apache.flink.testutils.TestingUtils) VoidNamespaceSerializer(org.apache.flink.queryablestate.client.VoidNamespaceSerializer) List(java.util.List) ValueState(org.apache.flink.api.common.state.ValueState) ClusterClient(org.apache.flink.client.program.ClusterClient) OneInputStreamOperator(org.apache.flink.streaming.api.operators.OneInputStreamOperator) VoidNamespace(org.apache.flink.queryablestate.client.VoidNamespace) Time(org.apache.flink.api.common.time.Time) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) AtomicLongArray(java.util.concurrent.atomic.AtomicLongArray) ScheduledExecutorServiceAdapter(org.apache.flink.util.concurrent.ScheduledExecutorServiceAdapter) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) JobStatus(org.apache.flink.api.common.JobStatus) RestartStrategies(org.apache.flink.api.common.restartstrategy.RestartStrategies) MapStateDescriptor(org.apache.flink.api.common.state.MapStateDescriptor) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) JobResult(org.apache.flink.runtime.jobmaster.JobResult) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) Collector(org.apache.flink.util.Collector) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) RichParallelSourceFunction(org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) ProcessFunction(org.apache.flink.streaming.api.functions.ProcessFunction) ReduceFunction(org.apache.flink.api.common.functions.ReduceFunction) AggregatingStateDescriptor(org.apache.flink.api.common.state.AggregatingStateDescriptor) Before(org.junit.Before) Serializer(com.esotericsoftware.kryo.Serializer) StateDescriptor(org.apache.flink.api.common.state.StateDescriptor) GenericTypeInfo(org.apache.flink.api.java.typeutils.GenericTypeInfo) ValueStateDescriptor(org.apache.flink.api.common.state.ValueStateDescriptor) Configuration(org.apache.flink.configuration.Configuration) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) IOException(java.io.IOException) UnknownKeyOrNamespaceException(org.apache.flink.queryablestate.exceptions.UnknownKeyOrNamespaceException) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) DataStream(org.apache.flink.streaming.api.datastream.DataStream) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) AtomicLong(java.util.concurrent.atomic.AtomicLong) JobID(org.apache.flink.api.common.JobID) Ignore(org.junit.Ignore) MapState(org.apache.flink.api.common.state.MapState) Assert(org.junit.Assert) QueryableStateClient(org.apache.flink.queryablestate.client.QueryableStateClient) TemporaryFolder(org.junit.rules.TemporaryFolder) Assert.assertEquals(org.junit.Assert.assertEquals) ReducingStateDescriptor(org.apache.flink.api.common.state.ReducingStateDescriptor) JobResult(org.apache.flink.runtime.jobmaster.JobResult) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) KeySelector(org.apache.flink.api.java.functions.KeySelector) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) Tuple2(org.apache.flink.api.java.tuple.Tuple2) AtomicLong(java.util.concurrent.atomic.AtomicLong) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 57 with DataStream

use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.

the class DataStreamAllroundTestProgram method main.

public static void main(String[] args) throws Exception {
    final ParameterTool pt = ParameterTool.fromArgs(args);
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    setupEnvironment(env, pt);
    // add a keyed stateful map operator, which uses Kryo for state serialization
    DataStream<Event> eventStream = env.addSource(createEventSource(pt)).name(EVENT_SOURCE.getName()).uid(EVENT_SOURCE.getUid()).assignTimestampsAndWatermarks(createTimestampExtractor(pt)).keyBy(Event::getKey).map(createArtificialKeyedStateMapper(// map function simply forwards the inputs
    (MapFunction<Event, Event>) in -> in, // ComplexPayload state object
    (Event event, ComplexPayload lastState) -> {
        if (lastState != null && !lastState.getStrPayload().equals(KEYED_STATE_OPER_WITH_KRYO_AND_CUSTOM_SER.getName()) && lastState.getInnerPayLoad().getSequenceNumber() == (event.getSequenceNumber() - 1)) {
            throwIncorrectRestoredStateException((event.getSequenceNumber() - 1), KEYED_STATE_OPER_WITH_KRYO_AND_CUSTOM_SER.getName(), lastState.getStrPayload());
        }
        return new ComplexPayload(event, KEYED_STATE_OPER_WITH_KRYO_AND_CUSTOM_SER.getName());
    }, Arrays.asList(new KryoSerializer<>(ComplexPayload.class, // KryoSerializer
    env.getConfig()), // custom
    new StatefulComplexPayloadSerializer()), // serializer
    Collections.singletonList(// KryoSerializer via type
    ComplexPayload.class))).returns(Event.class).name(KEYED_STATE_OPER_WITH_KRYO_AND_CUSTOM_SER.getName()).uid(KEYED_STATE_OPER_WITH_KRYO_AND_CUSTOM_SER.getUid());
    // add a keyed stateful map operator, which uses Avro for state serialization
    eventStream = eventStream.keyBy(Event::getKey).map(createArtificialKeyedStateMapper(// map function simply forwards the inputs
    (MapFunction<Event, Event>) in -> in, // ComplexPayloadAvro state object
    (Event event, ComplexPayloadAvro lastState) -> {
        if (lastState != null && !lastState.getStrPayload().equals(KEYED_STATE_OPER_WITH_AVRO_SER.getName()) && lastState.getInnerPayLoad().getSequenceNumber() == (event.getSequenceNumber() - 1)) {
            throwIncorrectRestoredStateException((event.getSequenceNumber() - 1), KEYED_STATE_OPER_WITH_AVRO_SER.getName(), lastState.getStrPayload());
        }
        ComplexPayloadAvro payload = new ComplexPayloadAvro();
        payload.setEventTime(event.getEventTime());
        payload.setInnerPayLoad(new InnerPayLoadAvro(event.getSequenceNumber()));
        payload.setStrPayload(KEYED_STATE_OPER_WITH_AVRO_SER.getName());
        payload.setStringList(Arrays.asList(String.valueOf(event.getKey()), event.getPayload()));
        return payload;
    }, Collections.singletonList(new AvroSerializer<>(ComplexPayloadAvro.class)), // custom AvroSerializer
    Collections.singletonList(// AvroSerializer via type
    ComplexPayloadAvro.class))).returns(Event.class).name(KEYED_STATE_OPER_WITH_AVRO_SER.getName()).uid(KEYED_STATE_OPER_WITH_AVRO_SER.getUid());
    DataStream<Event> eventStream2 = eventStream.map(createArtificialOperatorStateMapper((MapFunction<Event, Event>) in -> in)).returns(Event.class).name(OPERATOR_STATE_OPER.getName()).uid(OPERATOR_STATE_OPER.getUid());
    // apply a tumbling window that simply passes forward window elements;
    // this allows the job to cover timers state
    @SuppressWarnings("Convert2Lambda") DataStream<Event> eventStream3 = applyTumblingWindows(eventStream2.keyBy(Event::getKey), pt).apply(new WindowFunction<Event, Event, Integer, TimeWindow>() {

        @Override
        public void apply(Integer integer, TimeWindow window, Iterable<Event> input, Collector<Event> out) {
            for (Event e : input) {
                out.collect(e);
            }
        }
    }).name(TIME_WINDOW_OPER.getName()).uid(TIME_WINDOW_OPER.getUid());
    eventStream3 = DataStreamAllroundTestJobFactory.verifyCustomStatefulTypeSerializer(eventStream3);
    if (isSimulateFailures(pt)) {
        eventStream3 = eventStream3.map(createFailureMapper(pt)).setParallelism(1).name(FAILURE_MAPPER_NAME.getName()).uid(FAILURE_MAPPER_NAME.getUid());
    }
    eventStream3.keyBy(Event::getKey).flatMap(createSemanticsCheckMapper(pt)).name(SEMANTICS_CHECK_MAPPER.getName()).uid(SEMANTICS_CHECK_MAPPER.getUid()).addSink(new PrintSinkFunction<>()).name(SEMANTICS_CHECK_PRINT_SINK.getName()).uid(SEMANTICS_CHECK_PRINT_SINK.getUid());
    // Check sliding windows aggregations. Output all elements assigned to a window and later on
    // check if each event was emitted slide_factor number of times
    DataStream<Tuple2<Integer, List<Event>>> eventStream4 = eventStream2.keyBy(Event::getKey).window(createSlidingWindow(pt)).apply(new WindowFunction<Event, Tuple2<Integer, List<Event>>, Integer, TimeWindow>() {

        private static final long serialVersionUID = 3166250579972849440L;

        @Override
        public void apply(Integer key, TimeWindow window, Iterable<Event> input, Collector<Tuple2<Integer, List<Event>>> out) {
            out.collect(Tuple2.of(key, StreamSupport.stream(input.spliterator(), false).collect(Collectors.toList())));
        }
    }).name(SLIDING_WINDOW_AGG.getName()).uid(SLIDING_WINDOW_AGG.getUid());
    eventStream4.keyBy(events -> events.f0).flatMap(createSlidingWindowCheckMapper(pt)).name(SLIDING_WINDOW_CHECK_MAPPER.getName()).uid(SLIDING_WINDOW_CHECK_MAPPER.getUid()).addSink(new PrintSinkFunction<>()).name(SLIDING_WINDOW_CHECK_PRINT_SINK.getName()).uid(SLIDING_WINDOW_CHECK_PRINT_SINK.getUid());
    env.execute("General purpose test job");
}
Also used : ParameterTool(org.apache.flink.api.java.utils.ParameterTool) StatefulComplexPayloadSerializer(org.apache.flink.streaming.tests.artificialstate.StatefulComplexPayloadSerializer) SEMANTICS_CHECK_PRINT_SINK(org.apache.flink.streaming.tests.TestOperatorEnum.SEMANTICS_CHECK_PRINT_SINK) Arrays(java.util.Arrays) DataStreamAllroundTestJobFactory.applyTumblingWindows(org.apache.flink.streaming.tests.DataStreamAllroundTestJobFactory.applyTumblingWindows) DataStreamAllroundTestJobFactory.createFailureMapper(org.apache.flink.streaming.tests.DataStreamAllroundTestJobFactory.createFailureMapper) DataStreamAllroundTestJobFactory.createArtificialKeyedStateMapper(org.apache.flink.streaming.tests.DataStreamAllroundTestJobFactory.createArtificialKeyedStateMapper) Tuple2(org.apache.flink.api.java.tuple.Tuple2) PrintSinkFunction(org.apache.flink.streaming.api.functions.sink.PrintSinkFunction) KEYED_STATE_OPER_WITH_KRYO_AND_CUSTOM_SER(org.apache.flink.streaming.tests.TestOperatorEnum.KEYED_STATE_OPER_WITH_KRYO_AND_CUSTOM_SER) ComplexPayloadAvro(org.apache.flink.streaming.tests.avro.ComplexPayloadAvro) AvroSerializer(org.apache.flink.formats.avro.typeutils.AvroSerializer) MapFunction(org.apache.flink.api.common.functions.MapFunction) DataStreamAllroundTestJobFactory.createSlidingWindow(org.apache.flink.streaming.tests.DataStreamAllroundTestJobFactory.createSlidingWindow) DataStreamAllroundTestJobFactory.createSlidingWindowCheckMapper(org.apache.flink.streaming.tests.DataStreamAllroundTestJobFactory.createSlidingWindowCheckMapper) ParameterTool(org.apache.flink.api.java.utils.ParameterTool) EVENT_SOURCE(org.apache.flink.streaming.tests.TestOperatorEnum.EVENT_SOURCE) DataStreamAllroundTestJobFactory.isSimulateFailures(org.apache.flink.streaming.tests.DataStreamAllroundTestJobFactory.isSimulateFailures) DataStreamAllroundTestJobFactory.setupEnvironment(org.apache.flink.streaming.tests.DataStreamAllroundTestJobFactory.setupEnvironment) Collector(org.apache.flink.util.Collector) TimeWindow(org.apache.flink.streaming.api.windowing.windows.TimeWindow) StreamSupport(java.util.stream.StreamSupport) OPERATOR_STATE_OPER(org.apache.flink.streaming.tests.TestOperatorEnum.OPERATOR_STATE_OPER) SLIDING_WINDOW_CHECK_MAPPER(org.apache.flink.streaming.tests.TestOperatorEnum.SLIDING_WINDOW_CHECK_MAPPER) KEYED_STATE_OPER_WITH_AVRO_SER(org.apache.flink.streaming.tests.TestOperatorEnum.KEYED_STATE_OPER_WITH_AVRO_SER) KryoSerializer(org.apache.flink.api.java.typeutils.runtime.kryo.KryoSerializer) FAILURE_MAPPER_NAME(org.apache.flink.streaming.tests.TestOperatorEnum.FAILURE_MAPPER_NAME) Collectors(java.util.stream.Collectors) DataStream(org.apache.flink.streaming.api.datastream.DataStream) WindowFunction(org.apache.flink.streaming.api.functions.windowing.WindowFunction) SLIDING_WINDOW_AGG(org.apache.flink.streaming.tests.TestOperatorEnum.SLIDING_WINDOW_AGG) List(java.util.List) DataStreamAllroundTestJobFactory.createArtificialOperatorStateMapper(org.apache.flink.streaming.tests.DataStreamAllroundTestJobFactory.createArtificialOperatorStateMapper) DataStreamAllroundTestJobFactory.createEventSource(org.apache.flink.streaming.tests.DataStreamAllroundTestJobFactory.createEventSource) ValueState(org.apache.flink.api.common.state.ValueState) ComplexPayload(org.apache.flink.streaming.tests.artificialstate.ComplexPayload) SEMANTICS_CHECK_MAPPER(org.apache.flink.streaming.tests.TestOperatorEnum.SEMANTICS_CHECK_MAPPER) SLIDING_WINDOW_CHECK_PRINT_SINK(org.apache.flink.streaming.tests.TestOperatorEnum.SLIDING_WINDOW_CHECK_PRINT_SINK) TIME_WINDOW_OPER(org.apache.flink.streaming.tests.TestOperatorEnum.TIME_WINDOW_OPER) Collections(java.util.Collections) InnerPayLoadAvro(org.apache.flink.streaming.tests.avro.InnerPayLoadAvro) DataStreamAllroundTestJobFactory.createSemanticsCheckMapper(org.apache.flink.streaming.tests.DataStreamAllroundTestJobFactory.createSemanticsCheckMapper) DataStreamAllroundTestJobFactory.createTimestampExtractor(org.apache.flink.streaming.tests.DataStreamAllroundTestJobFactory.createTimestampExtractor) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) MapFunction(org.apache.flink.api.common.functions.MapFunction) InnerPayLoadAvro(org.apache.flink.streaming.tests.avro.InnerPayLoadAvro) StatefulComplexPayloadSerializer(org.apache.flink.streaming.tests.artificialstate.StatefulComplexPayloadSerializer) ComplexPayloadAvro(org.apache.flink.streaming.tests.avro.ComplexPayloadAvro) TimeWindow(org.apache.flink.streaming.api.windowing.windows.TimeWindow) KryoSerializer(org.apache.flink.api.java.typeutils.runtime.kryo.KryoSerializer) ComplexPayload(org.apache.flink.streaming.tests.artificialstate.ComplexPayload) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)

Example 58 with DataStream

use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.

the class CEPITCase method testTimeoutHandling.

@Test
public void testTimeoutHandling() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(envConfiguration);
    env.setParallelism(1);
    // (Event, timestamp)
    DataStream<Event> input = env.fromElements(Tuple2.of(new Event(1, "start", 1.0), 1L), Tuple2.of(new Event(1, "middle", 2.0), 5L), Tuple2.of(new Event(1, "start", 2.0), 4L), Tuple2.of(new Event(1, "end", 2.0), 6L)).assignTimestampsAndWatermarks(new AssignerWithPunctuatedWatermarks<Tuple2<Event, Long>>() {

        @Override
        public long extractTimestamp(Tuple2<Event, Long> element, long currentTimestamp) {
            return element.f1;
        }

        @Override
        public Watermark checkAndGetNextWatermark(Tuple2<Event, Long> lastElement, long extractedTimestamp) {
            return new Watermark(lastElement.f1 - 5);
        }
    }).map(new MapFunction<Tuple2<Event, Long>, Event>() {

        @Override
        public Event map(Tuple2<Event, Long> value) throws Exception {
            return value.f0;
        }
    });
    Pattern<Event, ?> pattern = Pattern.<Event>begin("start").where(new SimpleCondition<Event>() {

        @Override
        public boolean filter(Event value) throws Exception {
            return value.getName().equals("start");
        }
    }).followedByAny("middle").where(new SimpleCondition<Event>() {

        @Override
        public boolean filter(Event value) throws Exception {
            return value.getName().equals("middle");
        }
    }).followedByAny("end").where(new SimpleCondition<Event>() {

        @Override
        public boolean filter(Event value) throws Exception {
            return value.getName().equals("end");
        }
    }).within(Time.milliseconds(3));
    DataStream<Either<String, String>> result = CEP.pattern(input, pattern).select(new PatternTimeoutFunction<Event, String>() {

        @Override
        public String timeout(Map<String, List<Event>> pattern, long timeoutTimestamp) throws Exception {
            return pattern.get("start").get(0).getPrice() + "";
        }
    }, new PatternSelectFunction<Event, String>() {

        @Override
        public String select(Map<String, List<Event>> pattern) {
            StringBuilder builder = new StringBuilder();
            builder.append(pattern.get("start").get(0).getPrice()).append(",").append(pattern.get("middle").get(0).getPrice()).append(",").append(pattern.get("end").get(0).getPrice());
            return builder.toString();
        }
    });
    List<Either<String, String>> resultList = new ArrayList<>();
    DataStreamUtils.collect(result).forEachRemaining(resultList::add);
    resultList.sort(Comparator.comparing(either -> either.toString()));
    List<Either<String, String>> expected = Arrays.asList(Either.Left.of("1.0"), Either.Left.of("2.0"), Either.Left.of("2.0"), Either.Right.of("2.0,2.0,2.0"));
    assertEquals(expected, resultList);
}
Also used : Arrays(java.util.Arrays) Tuple2(org.apache.flink.api.java.tuple.Tuple2) RichIterativeCondition(org.apache.flink.cep.pattern.conditions.RichIterativeCondition) Either(org.apache.flink.types.Either) RunWith(org.junit.runner.RunWith) Watermark(org.apache.flink.streaming.api.watermark.Watermark) MapStateDescriptor(org.apache.flink.api.common.state.MapStateDescriptor) DataStreamSource(org.apache.flink.streaming.api.datastream.DataStreamSource) MapFunction(org.apache.flink.api.common.functions.MapFunction) ArrayList(java.util.ArrayList) NFA(org.apache.flink.cep.nfa.NFA) Collector(org.apache.flink.util.Collector) Duration(java.time.Duration) Map(java.util.Map) LongSerializer(org.apache.flink.api.common.typeutils.base.LongSerializer) Pattern(org.apache.flink.cep.pattern.Pattern) AssignerWithPunctuatedWatermarks(org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks) Parameterized(org.junit.runners.Parameterized) AbstractTestBase(org.apache.flink.test.util.AbstractTestBase) Types(org.apache.flink.api.common.typeinfo.Types) Time(org.apache.flink.streaming.api.windowing.time.Time) KeySelector(org.apache.flink.api.java.functions.KeySelector) Collection(java.util.Collection) Configuration(org.apache.flink.configuration.Configuration) DataStreamUtils(org.apache.flink.streaming.api.datastream.DataStreamUtils) OutputTag(org.apache.flink.util.OutputTag) Test(org.junit.Test) DataStream(org.apache.flink.streaming.api.datastream.DataStream) List(java.util.List) AfterMatchSkipStrategy(org.apache.flink.cep.nfa.aftermatch.AfterMatchSkipStrategy) CEPCacheOptions(org.apache.flink.cep.configuration.CEPCacheOptions) SimpleCondition(org.apache.flink.cep.pattern.conditions.SimpleCondition) Comparator(java.util.Comparator) Assert.assertEquals(org.junit.Assert.assertEquals) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) SimpleCondition(org.apache.flink.cep.pattern.conditions.SimpleCondition) ArrayList(java.util.ArrayList) AssignerWithPunctuatedWatermarks(org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks) Either(org.apache.flink.types.Either) ArrayList(java.util.ArrayList) List(java.util.List) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Watermark(org.apache.flink.streaming.api.watermark.Watermark) Test(org.junit.Test)

Example 59 with DataStream

use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.

the class SocketWindowWordCount method main.

public static void main(String[] args) throws Exception {
    // the host and the port to connect to
    final String hostname;
    final int port;
    try {
        final ParameterTool params = ParameterTool.fromArgs(args);
        hostname = params.has("hostname") ? params.get("hostname") : "localhost";
        port = params.getInt("port");
    } catch (Exception e) {
        System.err.println("No port specified. Please run 'SocketWindowWordCount " + "--hostname <hostname> --port <port>', where hostname (localhost by default) " + "and port is the address of the text server");
        System.err.println("To start a simple text server, run 'netcat -l <port>' and " + "type the input text into the command line");
        return;
    }
    // get the execution environment
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    // get input data by connecting to the socket
    DataStream<String> text = env.socketTextStream(hostname, port, "\n");
    // parse the data, group it, window it, and aggregate the counts
    DataStream<WordWithCount> windowCounts = text.flatMap((FlatMapFunction<String, WordWithCount>) (value, out) -> {
        for (String word : value.split("\\s")) {
            out.collect(new WordWithCount(word, 1L));
        }
    }, Types.POJO(WordWithCount.class)).keyBy(value -> value.word).window(TumblingProcessingTimeWindows.of(Time.seconds(5))).reduce((a, b) -> new WordWithCount(a.word, a.count + b.count)).returns(WordWithCount.class);
    // print the results with a single thread, rather than in parallel
    windowCounts.print().setParallelism(1);
    env.execute("Socket Window WordCount");
}
Also used : ParameterTool(org.apache.flink.api.java.utils.ParameterTool) FlatMapFunction(org.apache.flink.api.common.functions.FlatMapFunction) Types(org.apache.flink.api.common.typeinfo.Types) DataStream(org.apache.flink.streaming.api.datastream.DataStream) ParameterTool(org.apache.flink.api.java.utils.ParameterTool) Time(org.apache.flink.streaming.api.windowing.time.Time) TumblingProcessingTimeWindows(org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)

Example 60 with DataStream

use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.

the class GroupedProcessingTimeWindowExample method main.

public static void main(String[] args) throws Exception {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStream<Tuple2<Long, Long>> stream = env.addSource(new DataSource());
    stream.keyBy(value -> value.f0).window(SlidingProcessingTimeWindows.of(Time.milliseconds(2500), Time.milliseconds(500))).reduce(new SummingReducer()).addSink(new DiscardingSink<>());
    env.execute();
}
Also used : Tuple(org.apache.flink.api.java.tuple.Tuple) SlidingProcessingTimeWindows(org.apache.flink.streaming.api.windowing.assigners.SlidingProcessingTimeWindows) Time(org.apache.flink.streaming.api.windowing.time.Time) KeySelector(org.apache.flink.api.java.functions.KeySelector) Tuple2(org.apache.flink.api.java.tuple.Tuple2) DiscardingSink(org.apache.flink.streaming.api.functions.sink.DiscardingSink) Window(org.apache.flink.streaming.api.windowing.windows.Window) DataStream(org.apache.flink.streaming.api.datastream.DataStream) WindowFunction(org.apache.flink.streaming.api.functions.windowing.WindowFunction) Collector(org.apache.flink.util.Collector) RichParallelSourceFunction(org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction) ReduceFunction(org.apache.flink.api.common.functions.ReduceFunction) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)

Aggregations

DataStream (org.apache.flink.streaming.api.datastream.DataStream)87 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)78 Test (org.junit.Test)70 List (java.util.List)62 Collector (org.apache.flink.util.Collector)60 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)50 SingleOutputStreamOperator (org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator)48 Arrays (java.util.Arrays)46 ArrayList (java.util.ArrayList)40 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)40 Assert.assertEquals (org.junit.Assert.assertEquals)38 WatermarkStrategy (org.apache.flink.api.common.eventtime.WatermarkStrategy)36 Configuration (org.apache.flink.configuration.Configuration)36 Assert.assertTrue (org.junit.Assert.assertTrue)33 BasicTypeInfo (org.apache.flink.api.common.typeinfo.BasicTypeInfo)32 StreamOperator (org.apache.flink.streaming.api.operators.StreamOperator)32 Types (org.apache.flink.api.common.typeinfo.Types)31 Assert (org.junit.Assert)31 ReduceFunction (org.apache.flink.api.common.functions.ReduceFunction)29 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)29