use of org.apache.flink.streaming.api.windowing.time.Time in project flink by apache.
the class NFACompiler method compileFactory.
/**
* Compiles the given pattern into a {@link NFAFactory}. The NFA factory can be used to create
* multiple NFAs.
*
* @param pattern Definition of sequence pattern
* @param inputTypeSerializer Serializer for the input type
* @param timeoutHandling True if the NFA shall return timed out event patterns
* @param <T> Type of the input events
* @return Factory for NFAs corresponding to the given pattern
*/
@SuppressWarnings("unchecked")
public static <T> NFAFactory<T> compileFactory(Pattern<T, ?> pattern, TypeSerializer<T> inputTypeSerializer, boolean timeoutHandling) {
if (pattern == null) {
// return a factory for empty NFAs
return new NFAFactoryImpl<T>(inputTypeSerializer, 0, Collections.<State<T>>emptyList(), timeoutHandling);
} else {
// set of all generated states
Map<String, State<T>> states = new HashMap<>();
long windowTime;
// this is used to enforse pattern name uniqueness.
Set<String> patternNames = new HashSet<>();
Pattern<T, ?> succeedingPattern;
State<T> succeedingState;
Pattern<T, ?> currentPattern = pattern;
// we're traversing the pattern from the end to the beginning --> the first state is the final state
State<T> currentState = new State<>(currentPattern.getName(), State.StateType.Final);
patternNames.add(currentPattern.getName());
states.put(currentPattern.getName(), currentState);
windowTime = currentPattern.getWindowTime() != null ? currentPattern.getWindowTime().toMilliseconds() : 0L;
while (currentPattern.getPrevious() != null) {
succeedingPattern = currentPattern;
succeedingState = currentState;
currentPattern = currentPattern.getPrevious();
if (!patternNames.add(currentPattern.getName())) {
throw new MalformedPatternException("Duplicate pattern name: " + currentPattern.getName() + ". " + "Pattern names must be unique.");
}
Time currentWindowTime = currentPattern.getWindowTime();
if (currentWindowTime != null && currentWindowTime.toMilliseconds() < windowTime) {
// the window time is the global minimum of all window times of each state
windowTime = currentWindowTime.toMilliseconds();
}
if (states.containsKey(currentPattern.getName())) {
currentState = states.get(currentPattern.getName());
} else {
currentState = new State<>(currentPattern.getName(), State.StateType.Normal);
states.put(currentState.getName(), currentState);
}
currentState.addStateTransition(new StateTransition<T>(StateTransitionAction.TAKE, succeedingState, (FilterFunction<T>) succeedingPattern.getFilterFunction()));
if (succeedingPattern instanceof FollowedByPattern) {
// the followed by pattern entails a reflexive ignore transition
currentState.addStateTransition(new StateTransition<T>(StateTransitionAction.IGNORE, currentState, null));
}
}
// add the beginning state
final State<T> beginningState;
if (states.containsKey(BEGINNING_STATE_NAME)) {
beginningState = states.get(BEGINNING_STATE_NAME);
} else {
beginningState = new State<>(BEGINNING_STATE_NAME, State.StateType.Start);
states.put(BEGINNING_STATE_NAME, beginningState);
}
beginningState.addStateTransition(new StateTransition<T>(StateTransitionAction.TAKE, currentState, (FilterFunction<T>) currentPattern.getFilterFunction()));
return new NFAFactoryImpl<T>(inputTypeSerializer, windowTime, new HashSet<>(states.values()), timeoutHandling);
}
}
use of org.apache.flink.streaming.api.windowing.time.Time in project flink by apache.
the class StateDescriptorPassingTest method testApplyWindowAllState.
@Test
public void testApplyWindowAllState() {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.registerTypeWithKryoSerializer(File.class, JavaSerializer.class);
// simulate ingestion time
DataStream<File> src = env.fromElements(new File("/")).assignTimestampsAndWatermarks(WatermarkStrategy.<File>forMonotonousTimestamps().withTimestampAssigner((file, ts) -> System.currentTimeMillis()));
SingleOutputStreamOperator<?> result = src.windowAll(TumblingEventTimeWindows.of(Time.milliseconds(1000))).apply(new AllWindowFunction<File, String, TimeWindow>() {
@Override
public void apply(TimeWindow window, Iterable<File> input, Collector<String> out) {
}
});
validateListStateDescriptorConfigured(result);
}
use of org.apache.flink.streaming.api.windowing.time.Time in project flink by apache.
the class DataStreamJavaITCase method testFromAndToChangelogStreamEventTime.
@Test
public void testFromAndToChangelogStreamEventTime() throws Exception {
final StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
final DataStream<Tuple3<Long, Integer, String>> dataStream = getWatermarkedDataStream();
final DataStream<Row> changelogStream = dataStream.map(t -> Row.ofKind(RowKind.INSERT, t.f1, t.f2)).returns(Types.ROW(Types.INT, Types.STRING));
// derive physical columns and add a rowtime
final Table table = tableEnv.fromChangelogStream(changelogStream, Schema.newBuilder().columnByMetadata("rowtime", TIMESTAMP_LTZ(3)).columnByExpression("computed", $("f1").upperCase()).watermark("rowtime", sourceWatermark()).build());
tableEnv.createTemporaryView("t", table);
// access and reorder columns
final Table reordered = tableEnv.sqlQuery("SELECT computed, rowtime, f0 FROM t");
// write out the rowtime column with fully declared schema
final DataStream<Row> result = tableEnv.toChangelogStream(reordered, Schema.newBuilder().column("f1", STRING()).columnByMetadata("rowtime", TIMESTAMP_LTZ(3)).columnByExpression("ignored", $("f1").upperCase()).column("f0", INT()).build());
// test event time window and field access
testResult(result.keyBy(k -> k.getField("f1")).window(TumblingEventTimeWindows.of(Time.milliseconds(5))).<Row>apply((key, window, input, out) -> {
int sum = 0;
for (Row row : input) {
sum += row.<Integer>getFieldAs("f0");
}
out.collect(Row.of(key, sum));
}).returns(Types.ROW(Types.STRING, Types.INT)), Row.of("A", 47), Row.of("C", 1000), Row.of("C", 1000));
}
use of org.apache.flink.streaming.api.windowing.time.Time in project flink by apache.
the class CoGroupedStreamsTest method testDelegateToCoGrouped.
@Test
public void testDelegateToCoGrouped() {
Time lateness = Time.milliseconds(42L);
CoGroupedStreams.WithWindow<String, String, String, TimeWindow> withLateness = dataStream1.coGroup(dataStream2).where(keySelector).equalTo(keySelector).window(tsAssigner).allowedLateness(lateness);
withLateness.apply(coGroupFunction, BasicTypeInfo.STRING_TYPE_INFO);
Assert.assertEquals(lateness.toMilliseconds(), withLateness.getWindowedStream().getAllowedLateness());
}
use of org.apache.flink.streaming.api.windowing.time.Time in project flink by apache.
the class FlinkKinesisConsumerTest method testPeriodicWatermark.
@Test
public void testPeriodicWatermark() throws Exception {
String streamName = "fakeStreamName";
Time maxOutOfOrderness = Time.milliseconds(5);
long autoWatermarkInterval = 1_000;
HashMap<String, String> subscribedStreamsToLastDiscoveredShardIds = new HashMap<>();
subscribedStreamsToLastDiscoveredShardIds.put(streamName, null);
KinesisDeserializationSchema<String> deserializationSchema = new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema());
Properties props = new Properties();
props.setProperty(ConsumerConfigConstants.AWS_REGION, "us-east-1");
props.setProperty(ConsumerConfigConstants.SHARD_GETRECORDS_INTERVAL_MILLIS, Long.toString(10L));
BlockingQueue<String> shard1 = new LinkedBlockingQueue<>();
BlockingQueue<String> shard2 = new LinkedBlockingQueue<>();
Map<String, List<BlockingQueue<String>>> streamToQueueMap = new HashMap<>();
streamToQueueMap.put(streamName, Arrays.asList(shard1, shard2));
// override createFetcher to mock Kinesis
FlinkKinesisConsumer<String> sourceFunc = new FlinkKinesisConsumer<String>(streamName, deserializationSchema, props) {
@Override
protected KinesisDataFetcher<String> createFetcher(List<String> streams, SourceContext<String> sourceContext, RuntimeContext runtimeContext, Properties configProps, KinesisDeserializationSchema<String> deserializationSchema) {
KinesisDataFetcher<String> fetcher = new KinesisDataFetcher<String>(streams, sourceContext, sourceContext.getCheckpointLock(), runtimeContext, configProps, deserializationSchema, getShardAssigner(), getPeriodicWatermarkAssigner(), null, new AtomicReference<>(), new ArrayList<>(), subscribedStreamsToLastDiscoveredShardIds, (props) -> FakeKinesisBehavioursFactory.blockingQueueGetRecords(streamToQueueMap), null) {
};
return fetcher;
}
};
sourceFunc.setShardAssigner((streamShardHandle, i) -> {
// shardId-000000000000
return Integer.parseInt(streamShardHandle.getShard().getShardId().substring("shardId-".length()));
});
sourceFunc.setPeriodicWatermarkAssigner(new TestTimestampExtractor(maxOutOfOrderness));
// there is currently no test harness specifically for sources,
// so we overlay the source thread here
AbstractStreamOperatorTestHarness<Object> testHarness = new AbstractStreamOperatorTestHarness<Object>(new StreamSource(sourceFunc), 1, 1, 0);
testHarness.setTimeCharacteristic(TimeCharacteristic.EventTime);
testHarness.getExecutionConfig().setAutoWatermarkInterval(autoWatermarkInterval);
testHarness.initializeEmptyState();
testHarness.open();
ConcurrentLinkedQueue<Watermark> watermarks = new ConcurrentLinkedQueue<>();
@SuppressWarnings("unchecked") SourceFunction.SourceContext<String> sourceContext = new CollectingSourceContext(testHarness.getCheckpointLock(), testHarness.getOutput()) {
@Override
public void emitWatermark(Watermark mark) {
watermarks.add(mark);
}
@Override
public void markAsTemporarilyIdle() {
}
};
new Thread(() -> {
try {
sourceFunc.run(sourceContext);
} catch (InterruptedException e) {
// expected on cancel
} catch (Exception e) {
throw new RuntimeException(e);
}
}).start();
shard1.put("1");
shard1.put("2");
shard2.put("10");
int recordCount = 3;
int watermarkCount = 0;
awaitRecordCount(testHarness.getOutput(), recordCount);
// Trigger watermark emit, first watermark is -3
// - Shard-1 @2
// - Shard-2 @10
// - Watermark = min(2, 10) - maxOutOfOrderness = 2 - 5 = -3
testHarness.setProcessingTime(testHarness.getProcessingTime() + autoWatermarkInterval);
watermarkCount++;
// advance watermark
shard1.put("10");
recordCount++;
awaitRecordCount(testHarness.getOutput(), recordCount);
// Trigger watermark emit, second watermark is -3
// - Shard-1 @10
// - Shard-2 @10
// - Watermark = min(10, 10) - maxOutOfOrderness = 10 - 5 = 5
testHarness.setProcessingTime(testHarness.getProcessingTime() + autoWatermarkInterval);
watermarkCount++;
sourceFunc.cancel();
testHarness.close();
assertEquals("record count", recordCount, testHarness.getOutput().size());
assertThat(watermarks, org.hamcrest.Matchers.contains(new Watermark(-3), new Watermark(5)));
assertEquals("watermark count", watermarkCount, watermarks.size());
}
Aggregations