use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.
the class AbstractQueryableStateTestBase method testDuplicateRegistrationFailsJob.
/**
* Tests that duplicate query registrations fail the job at the JobManager.
*/
@Test(timeout = 60_000)
public void testDuplicateRegistrationFailsJob() throws Exception {
final int numKeys = 256;
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStateBackend(stateBackend);
env.setParallelism(maxParallelism);
// Very important, because cluster is shared between tests and we
// don't explicitly check that all slots are available before
// submitting.
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000L));
DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestKeyRangeSource(numKeys));
// Reducing state
ReducingStateDescriptor<Tuple2<Integer, Long>> reducingState = new ReducingStateDescriptor<>("any-name", new SumReduce(), source.getType());
final String queryName = "duplicate-me";
final QueryableStateStream<Integer, Tuple2<Integer, Long>> queryableState = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {
private static final long serialVersionUID = -4126824763829132959L;
@Override
public Integer getKey(Tuple2<Integer, Long> value) {
return value.f0;
}
}).asQueryableState(queryName, reducingState);
final QueryableStateStream<Integer, Tuple2<Integer, Long>> duplicate = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {
private static final long serialVersionUID = -6265024000462809436L;
@Override
public Integer getKey(Tuple2<Integer, Long> value) {
return value.f0;
}
}).asQueryableState(queryName);
// Submit the job graph
final JobGraph jobGraph = env.getStreamGraph().getJobGraph();
clusterClient.submitJob(jobGraph).thenCompose(clusterClient::requestJobResult).thenApply(JobResult::getSerializedThrowable).thenAccept(serializedThrowable -> {
assertTrue(serializedThrowable.isPresent());
final Throwable t = serializedThrowable.get().deserializeError(getClass().getClassLoader());
final String failureCause = ExceptionUtils.stringifyException(t);
assertThat(failureCause, containsString("KvState with name '" + queryName + "' has already been registered by another operator"));
}).get();
}
use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.
the class DataStreamAllroundTestProgram method main.
public static void main(String[] args) throws Exception {
final ParameterTool pt = ParameterTool.fromArgs(args);
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
setupEnvironment(env, pt);
// add a keyed stateful map operator, which uses Kryo for state serialization
DataStream<Event> eventStream = env.addSource(createEventSource(pt)).name(EVENT_SOURCE.getName()).uid(EVENT_SOURCE.getUid()).assignTimestampsAndWatermarks(createTimestampExtractor(pt)).keyBy(Event::getKey).map(createArtificialKeyedStateMapper(// map function simply forwards the inputs
(MapFunction<Event, Event>) in -> in, // ComplexPayload state object
(Event event, ComplexPayload lastState) -> {
if (lastState != null && !lastState.getStrPayload().equals(KEYED_STATE_OPER_WITH_KRYO_AND_CUSTOM_SER.getName()) && lastState.getInnerPayLoad().getSequenceNumber() == (event.getSequenceNumber() - 1)) {
throwIncorrectRestoredStateException((event.getSequenceNumber() - 1), KEYED_STATE_OPER_WITH_KRYO_AND_CUSTOM_SER.getName(), lastState.getStrPayload());
}
return new ComplexPayload(event, KEYED_STATE_OPER_WITH_KRYO_AND_CUSTOM_SER.getName());
}, Arrays.asList(new KryoSerializer<>(ComplexPayload.class, // KryoSerializer
env.getConfig()), // custom
new StatefulComplexPayloadSerializer()), // serializer
Collections.singletonList(// KryoSerializer via type
ComplexPayload.class))).returns(Event.class).name(KEYED_STATE_OPER_WITH_KRYO_AND_CUSTOM_SER.getName()).uid(KEYED_STATE_OPER_WITH_KRYO_AND_CUSTOM_SER.getUid());
// add a keyed stateful map operator, which uses Avro for state serialization
eventStream = eventStream.keyBy(Event::getKey).map(createArtificialKeyedStateMapper(// map function simply forwards the inputs
(MapFunction<Event, Event>) in -> in, // ComplexPayloadAvro state object
(Event event, ComplexPayloadAvro lastState) -> {
if (lastState != null && !lastState.getStrPayload().equals(KEYED_STATE_OPER_WITH_AVRO_SER.getName()) && lastState.getInnerPayLoad().getSequenceNumber() == (event.getSequenceNumber() - 1)) {
throwIncorrectRestoredStateException((event.getSequenceNumber() - 1), KEYED_STATE_OPER_WITH_AVRO_SER.getName(), lastState.getStrPayload());
}
ComplexPayloadAvro payload = new ComplexPayloadAvro();
payload.setEventTime(event.getEventTime());
payload.setInnerPayLoad(new InnerPayLoadAvro(event.getSequenceNumber()));
payload.setStrPayload(KEYED_STATE_OPER_WITH_AVRO_SER.getName());
payload.setStringList(Arrays.asList(String.valueOf(event.getKey()), event.getPayload()));
return payload;
}, Collections.singletonList(new AvroSerializer<>(ComplexPayloadAvro.class)), // custom AvroSerializer
Collections.singletonList(// AvroSerializer via type
ComplexPayloadAvro.class))).returns(Event.class).name(KEYED_STATE_OPER_WITH_AVRO_SER.getName()).uid(KEYED_STATE_OPER_WITH_AVRO_SER.getUid());
DataStream<Event> eventStream2 = eventStream.map(createArtificialOperatorStateMapper((MapFunction<Event, Event>) in -> in)).returns(Event.class).name(OPERATOR_STATE_OPER.getName()).uid(OPERATOR_STATE_OPER.getUid());
// apply a tumbling window that simply passes forward window elements;
// this allows the job to cover timers state
@SuppressWarnings("Convert2Lambda") DataStream<Event> eventStream3 = applyTumblingWindows(eventStream2.keyBy(Event::getKey), pt).apply(new WindowFunction<Event, Event, Integer, TimeWindow>() {
@Override
public void apply(Integer integer, TimeWindow window, Iterable<Event> input, Collector<Event> out) {
for (Event e : input) {
out.collect(e);
}
}
}).name(TIME_WINDOW_OPER.getName()).uid(TIME_WINDOW_OPER.getUid());
eventStream3 = DataStreamAllroundTestJobFactory.verifyCustomStatefulTypeSerializer(eventStream3);
if (isSimulateFailures(pt)) {
eventStream3 = eventStream3.map(createFailureMapper(pt)).setParallelism(1).name(FAILURE_MAPPER_NAME.getName()).uid(FAILURE_MAPPER_NAME.getUid());
}
eventStream3.keyBy(Event::getKey).flatMap(createSemanticsCheckMapper(pt)).name(SEMANTICS_CHECK_MAPPER.getName()).uid(SEMANTICS_CHECK_MAPPER.getUid()).addSink(new PrintSinkFunction<>()).name(SEMANTICS_CHECK_PRINT_SINK.getName()).uid(SEMANTICS_CHECK_PRINT_SINK.getUid());
// Check sliding windows aggregations. Output all elements assigned to a window and later on
// check if each event was emitted slide_factor number of times
DataStream<Tuple2<Integer, List<Event>>> eventStream4 = eventStream2.keyBy(Event::getKey).window(createSlidingWindow(pt)).apply(new WindowFunction<Event, Tuple2<Integer, List<Event>>, Integer, TimeWindow>() {
private static final long serialVersionUID = 3166250579972849440L;
@Override
public void apply(Integer key, TimeWindow window, Iterable<Event> input, Collector<Tuple2<Integer, List<Event>>> out) {
out.collect(Tuple2.of(key, StreamSupport.stream(input.spliterator(), false).collect(Collectors.toList())));
}
}).name(SLIDING_WINDOW_AGG.getName()).uid(SLIDING_WINDOW_AGG.getUid());
eventStream4.keyBy(events -> events.f0).flatMap(createSlidingWindowCheckMapper(pt)).name(SLIDING_WINDOW_CHECK_MAPPER.getName()).uid(SLIDING_WINDOW_CHECK_MAPPER.getUid()).addSink(new PrintSinkFunction<>()).name(SLIDING_WINDOW_CHECK_PRINT_SINK.getName()).uid(SLIDING_WINDOW_CHECK_PRINT_SINK.getUid());
env.execute("General purpose test job");
}
use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.
the class CEPITCase method testTimeoutHandling.
@Test
public void testTimeoutHandling() throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(envConfiguration);
env.setParallelism(1);
// (Event, timestamp)
DataStream<Event> input = env.fromElements(Tuple2.of(new Event(1, "start", 1.0), 1L), Tuple2.of(new Event(1, "middle", 2.0), 5L), Tuple2.of(new Event(1, "start", 2.0), 4L), Tuple2.of(new Event(1, "end", 2.0), 6L)).assignTimestampsAndWatermarks(new AssignerWithPunctuatedWatermarks<Tuple2<Event, Long>>() {
@Override
public long extractTimestamp(Tuple2<Event, Long> element, long currentTimestamp) {
return element.f1;
}
@Override
public Watermark checkAndGetNextWatermark(Tuple2<Event, Long> lastElement, long extractedTimestamp) {
return new Watermark(lastElement.f1 - 5);
}
}).map(new MapFunction<Tuple2<Event, Long>, Event>() {
@Override
public Event map(Tuple2<Event, Long> value) throws Exception {
return value.f0;
}
});
Pattern<Event, ?> pattern = Pattern.<Event>begin("start").where(new SimpleCondition<Event>() {
@Override
public boolean filter(Event value) throws Exception {
return value.getName().equals("start");
}
}).followedByAny("middle").where(new SimpleCondition<Event>() {
@Override
public boolean filter(Event value) throws Exception {
return value.getName().equals("middle");
}
}).followedByAny("end").where(new SimpleCondition<Event>() {
@Override
public boolean filter(Event value) throws Exception {
return value.getName().equals("end");
}
}).within(Time.milliseconds(3));
DataStream<Either<String, String>> result = CEP.pattern(input, pattern).select(new PatternTimeoutFunction<Event, String>() {
@Override
public String timeout(Map<String, List<Event>> pattern, long timeoutTimestamp) throws Exception {
return pattern.get("start").get(0).getPrice() + "";
}
}, new PatternSelectFunction<Event, String>() {
@Override
public String select(Map<String, List<Event>> pattern) {
StringBuilder builder = new StringBuilder();
builder.append(pattern.get("start").get(0).getPrice()).append(",").append(pattern.get("middle").get(0).getPrice()).append(",").append(pattern.get("end").get(0).getPrice());
return builder.toString();
}
});
List<Either<String, String>> resultList = new ArrayList<>();
DataStreamUtils.collect(result).forEachRemaining(resultList::add);
resultList.sort(Comparator.comparing(either -> either.toString()));
List<Either<String, String>> expected = Arrays.asList(Either.Left.of("1.0"), Either.Left.of("2.0"), Either.Left.of("2.0"), Either.Right.of("2.0,2.0,2.0"));
assertEquals(expected, resultList);
}
use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.
the class SocketWindowWordCount method main.
public static void main(String[] args) throws Exception {
// the host and the port to connect to
final String hostname;
final int port;
try {
final ParameterTool params = ParameterTool.fromArgs(args);
hostname = params.has("hostname") ? params.get("hostname") : "localhost";
port = params.getInt("port");
} catch (Exception e) {
System.err.println("No port specified. Please run 'SocketWindowWordCount " + "--hostname <hostname> --port <port>', where hostname (localhost by default) " + "and port is the address of the text server");
System.err.println("To start a simple text server, run 'netcat -l <port>' and " + "type the input text into the command line");
return;
}
// get the execution environment
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// get input data by connecting to the socket
DataStream<String> text = env.socketTextStream(hostname, port, "\n");
// parse the data, group it, window it, and aggregate the counts
DataStream<WordWithCount> windowCounts = text.flatMap((FlatMapFunction<String, WordWithCount>) (value, out) -> {
for (String word : value.split("\\s")) {
out.collect(new WordWithCount(word, 1L));
}
}, Types.POJO(WordWithCount.class)).keyBy(value -> value.word).window(TumblingProcessingTimeWindows.of(Time.seconds(5))).reduce((a, b) -> new WordWithCount(a.word, a.count + b.count)).returns(WordWithCount.class);
// print the results with a single thread, rather than in parallel
windowCounts.print().setParallelism(1);
env.execute("Socket Window WordCount");
}
use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.
the class GroupedProcessingTimeWindowExample method main.
public static void main(String[] args) throws Exception {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<Tuple2<Long, Long>> stream = env.addSource(new DataSource());
stream.keyBy(value -> value.f0).window(SlidingProcessingTimeWindows.of(Time.milliseconds(2500), Time.milliseconds(500))).reduce(new SummingReducer()).addSink(new DiscardingSink<>());
env.execute();
}
Aggregations