use of org.apache.flink.table.runtime.operators.window.slicing.SliceAssigner in project flink by apache.
the class StreamExecWindowAggregate method translateToPlanInternal.
@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
final ExecEdge inputEdge = getInputEdges().get(0);
final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
final RowType inputRowType = (RowType) inputEdge.getOutputType();
final ZoneId shiftTimeZone = TimeWindowUtil.getShiftTimeZone(windowing.getTimeAttributeType(), config.getLocalTimeZone());
final SliceAssigner sliceAssigner = createSliceAssigner(windowing, shiftTimeZone);
// Hopping window requires additional COUNT(*) to determine whether to register next timer
// through whether the current fired window is empty, see SliceSharedWindowAggProcessor.
final AggregateInfoList aggInfoList = AggregateUtil.deriveStreamWindowAggregateInfoList(inputRowType, JavaScalaConversionUtil.toScala(Arrays.asList(aggCalls)), windowing.getWindow(), // isStateBackendDataViews
true);
final GeneratedNamespaceAggsHandleFunction<Long> generatedAggsHandler = createAggsHandler(sliceAssigner, aggInfoList, config, planner.getRelBuilder(), inputRowType.getChildren(), shiftTimeZone);
final RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(grouping, InternalTypeInfo.of(inputRowType));
final LogicalType[] accTypes = convertToLogicalTypes(aggInfoList.getAccTypes());
final OneInputStreamOperator<RowData, RowData> windowOperator = SlicingWindowAggOperatorBuilder.builder().inputSerializer(new RowDataSerializer(inputRowType)).shiftTimeZone(shiftTimeZone).keySerializer((PagedTypeSerializer<RowData>) selector.getProducedType().toSerializer()).assigner(sliceAssigner).countStarIndex(aggInfoList.getIndexOfCountStar()).aggregate(generatedAggsHandler, new RowDataSerializer(accTypes)).build();
final OneInputTransformation<RowData, RowData> transform = ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationMeta(WINDOW_AGGREGATE_TRANSFORMATION, config), SimpleOperatorFactory.of(windowOperator), InternalTypeInfo.of(getOutputType()), inputTransform.getParallelism(), WINDOW_AGG_MEMORY_RATIO);
// set KeyType and Selector for state
transform.setStateKeySelector(selector);
transform.setStateKeyType(selector.getProducedType());
return transform;
}
use of org.apache.flink.table.runtime.operators.window.slicing.SliceAssigner in project flink by apache.
the class StreamExecWindowAggregateBase method createSliceAssigner.
// ------------------------------------------------------------------------------------------
// Utilities
// ------------------------------------------------------------------------------------------
protected SliceAssigner createSliceAssigner(WindowingStrategy windowingStrategy, ZoneId shiftTimeZone) {
WindowSpec windowSpec = windowingStrategy.getWindow();
if (windowingStrategy instanceof WindowAttachedWindowingStrategy) {
int windowEndIndex = ((WindowAttachedWindowingStrategy) windowingStrategy).getWindowEnd();
// we don't need time attribute to assign windows, use a magic value in this case
SliceAssigner innerAssigner = createSliceAssigner(windowSpec, Integer.MAX_VALUE, shiftTimeZone);
return SliceAssigners.windowed(windowEndIndex, innerAssigner);
} else if (windowingStrategy instanceof SliceAttachedWindowingStrategy) {
int sliceEndIndex = ((SliceAttachedWindowingStrategy) windowingStrategy).getSliceEnd();
// we don't need time attribute to assign windows, use a magic value in this case
SliceAssigner innerAssigner = createSliceAssigner(windowSpec, Integer.MAX_VALUE, shiftTimeZone);
return SliceAssigners.sliced(sliceEndIndex, innerAssigner);
} else if (windowingStrategy instanceof TimeAttributeWindowingStrategy) {
final int timeAttributeIndex;
if (windowingStrategy.isRowtime()) {
timeAttributeIndex = ((TimeAttributeWindowingStrategy) windowingStrategy).getTimeAttributeIndex();
} else {
timeAttributeIndex = -1;
}
return createSliceAssigner(windowSpec, timeAttributeIndex, shiftTimeZone);
} else {
throw new UnsupportedOperationException(windowingStrategy + " is not supported yet.");
}
}
use of org.apache.flink.table.runtime.operators.window.slicing.SliceAssigner in project flink by apache.
the class StreamExecGlobalWindowAggregate method translateToPlanInternal.
@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
final ExecEdge inputEdge = getInputEdges().get(0);
final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
final RowType inputRowType = (RowType) inputEdge.getOutputType();
final ZoneId shiftTimeZone = TimeWindowUtil.getShiftTimeZone(windowing.getTimeAttributeType(), config.getLocalTimeZone());
final SliceAssigner sliceAssigner = createSliceAssigner(windowing, shiftTimeZone);
final AggregateInfoList localAggInfoList = AggregateUtil.deriveStreamWindowAggregateInfoList(// should use original input here
localAggInputRowType, JavaScalaConversionUtil.toScala(Arrays.asList(aggCalls)), windowing.getWindow(), // isStateBackendDataViews
false);
final AggregateInfoList globalAggInfoList = AggregateUtil.deriveStreamWindowAggregateInfoList(// should use original input here
localAggInputRowType, JavaScalaConversionUtil.toScala(Arrays.asList(aggCalls)), windowing.getWindow(), // isStateBackendDataViews
true);
// handler used to merge multiple local accumulators into one accumulator,
// where the accumulators are all on memory
final GeneratedNamespaceAggsHandleFunction<Long> localAggsHandler = createAggsHandler("LocalWindowAggsHandler", sliceAssigner, localAggInfoList, grouping.length, true, localAggInfoList.getAccTypes(), config, planner.getRelBuilder(), shiftTimeZone);
// handler used to merge the single local accumulator (on memory) into state accumulator
final GeneratedNamespaceAggsHandleFunction<Long> globalAggsHandler = createAggsHandler("GlobalWindowAggsHandler", sliceAssigner, globalAggInfoList, 0, true, localAggInfoList.getAccTypes(), config, planner.getRelBuilder(), shiftTimeZone);
// handler used to merge state accumulators for merging slices into window,
// e.g. Hop and Cumulate
final GeneratedNamespaceAggsHandleFunction<Long> stateAggsHandler = createAggsHandler("StateWindowAggsHandler", sliceAssigner, globalAggInfoList, 0, false, globalAggInfoList.getAccTypes(), config, planner.getRelBuilder(), shiftTimeZone);
final RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(grouping, InternalTypeInfo.of(inputRowType));
final LogicalType[] accTypes = convertToLogicalTypes(globalAggInfoList.getAccTypes());
final OneInputStreamOperator<RowData, RowData> windowOperator = SlicingWindowAggOperatorBuilder.builder().inputSerializer(new RowDataSerializer(inputRowType)).shiftTimeZone(shiftTimeZone).keySerializer((PagedTypeSerializer<RowData>) selector.getProducedType().toSerializer()).assigner(sliceAssigner).countStarIndex(globalAggInfoList.getIndexOfCountStar()).globalAggregate(localAggsHandler, globalAggsHandler, stateAggsHandler, new RowDataSerializer(accTypes)).build();
final OneInputTransformation<RowData, RowData> transform = ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationMeta(GLOBAL_WINDOW_AGGREGATE_TRANSFORMATION, config), SimpleOperatorFactory.of(windowOperator), InternalTypeInfo.of(getOutputType()), inputTransform.getParallelism(), WINDOW_AGG_MEMORY_RATIO);
// set KeyType and Selector for state
transform.setStateKeySelector(selector);
transform.setStateKeyType(selector.getProducedType());
return transform;
}
use of org.apache.flink.table.runtime.operators.window.slicing.SliceAssigner in project flink by apache.
the class SlicingWindowAggOperatorTest method testProcessingTimeHoppingWindows.
@Test
public void testProcessingTimeHoppingWindows() throws Exception {
final SliceAssigner assigner = SliceAssigners.hopping(-1, shiftTimeZone, Duration.ofHours(3), Duration.ofHours(1));
final SumAndCountAggsFunction aggsFunction = new SumAndCountAggsFunction(assigner);
SlicingWindowOperator<RowData, ?> operator = SlicingWindowAggOperatorBuilder.builder().inputSerializer(INPUT_ROW_SER).shiftTimeZone(shiftTimeZone).keySerializer(KEY_SER).assigner(assigner).aggregate(wrapGenerated(aggsFunction), ACC_SER).countStarIndex(1).build();
OneInputStreamOperatorTestHarness<RowData, RowData> testHarness = createTestHarness(operator);
testHarness.setup(OUT_SERIALIZER);
testHarness.open();
// process elements
ConcurrentLinkedQueue<Object> expectedOutput = new ConcurrentLinkedQueue<>();
// timestamp is ignored in processing time
testHarness.setProcessingTime(epochMills(shiftTimeZone, "1970-01-01T00:00:00.003"));
testHarness.processElement(insertRecord("key2", 1, fromEpochMillis(Long.MAX_VALUE)));
testHarness.setProcessingTime(epochMills(shiftTimeZone, "1970-01-01T01:00:00"));
expectedOutput.add(insertRecord("key2", 1L, 1L, epochMills(UTC_ZONE_ID, "1969-12-31T22:00:00"), epochMills(UTC_ZONE_ID, "1970-01-01T01:00:00")));
ASSERTER.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
testHarness.processElement(insertRecord("key2", 1, fromEpochMillis(Long.MAX_VALUE)));
testHarness.processElement(insertRecord("key2", 1, fromEpochMillis(Long.MAX_VALUE)));
testHarness.setProcessingTime(epochMills(shiftTimeZone, "1970-01-01T02:00:00"));
expectedOutput.add(insertRecord("key2", 3L, 3L, epochMills(UTC_ZONE_ID, "1969-12-31T23:00:00"), epochMills(UTC_ZONE_ID, "1970-01-01T02:00:00")));
ASSERTER.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
testHarness.processElement(insertRecord("key1", 1, fromEpochMillis(Long.MAX_VALUE)));
testHarness.processElement(insertRecord("key1", 1, fromEpochMillis(Long.MAX_VALUE)));
testHarness.setProcessingTime(epochMills(shiftTimeZone, "1970-01-01T03:00:00"));
expectedOutput.add(insertRecord("key2", 3L, 3L, epochMills(UTC_ZONE_ID, "1970-01-01T00:00:00"), epochMills(UTC_ZONE_ID, "1970-01-01T03:00:00")));
expectedOutput.add(insertRecord("key1", 2L, 2L, epochMills(UTC_ZONE_ID, "1970-01-01T00:00:00"), epochMills(UTC_ZONE_ID, "1970-01-01T03:00:00")));
ASSERTER.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
testHarness.processElement(insertRecord("key1", 1, fromEpochMillis(Long.MAX_VALUE)));
testHarness.processElement(insertRecord("key1", 1, fromEpochMillis(Long.MAX_VALUE)));
testHarness.processElement(insertRecord("key1", 1, fromEpochMillis(Long.MAX_VALUE)));
testHarness.setProcessingTime(epochMills(shiftTimeZone, "1970-01-01T07:00:00"));
expectedOutput.add(insertRecord("key2", 2L, 2L, epochMills(UTC_ZONE_ID, "1970-01-01T01:00:00"), epochMills(UTC_ZONE_ID, "1970-01-01T04:00:00")));
expectedOutput.add(insertRecord("key1", 5L, 5L, epochMills(UTC_ZONE_ID, "1970-01-01T01:00:00"), epochMills(UTC_ZONE_ID, "1970-01-01T04:00:00")));
expectedOutput.add(insertRecord("key1", 5L, 5L, epochMills(UTC_ZONE_ID, "1970-01-01T02:00:00"), epochMills(UTC_ZONE_ID, "1970-01-01T05:00:00")));
expectedOutput.add(insertRecord("key1", 3L, 3L, epochMills(UTC_ZONE_ID, "1970-01-01T03:00:00"), epochMills(UTC_ZONE_ID, "1970-01-01T06:00:00")));
ASSERTER.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
testHarness.close();
assertTrue("Close was not called.", aggsFunction.closeCalled.get() > 0);
}
use of org.apache.flink.table.runtime.operators.window.slicing.SliceAssigner in project flink by apache.
the class SlicingWindowAggOperatorTest method testEventTimeHoppingWindows.
@Test
public void testEventTimeHoppingWindows() throws Exception {
final SliceAssigner assigner = SliceAssigners.hopping(2, shiftTimeZone, Duration.ofSeconds(3), Duration.ofSeconds(1));
final SumAndCountAggsFunction aggsFunction = new SumAndCountAggsFunction(assigner);
SlicingWindowOperator<RowData, ?> operator = SlicingWindowAggOperatorBuilder.builder().inputSerializer(INPUT_ROW_SER).shiftTimeZone(shiftTimeZone).keySerializer(KEY_SER).assigner(assigner).aggregate(wrapGenerated(aggsFunction), ACC_SER).countStarIndex(1).build();
OneInputStreamOperatorTestHarness<RowData, RowData> testHarness = createTestHarness(operator);
testHarness.setup(OUT_SERIALIZER);
testHarness.open();
// process elements
ConcurrentLinkedQueue<Object> expectedOutput = new ConcurrentLinkedQueue<>();
// add elements out-of-order
testHarness.processElement(insertRecord("key2", 1, fromEpochMillis(3999L)));
testHarness.processElement(insertRecord("key2", 1, fromEpochMillis(3000L)));
testHarness.processElement(insertRecord("key1", 1, fromEpochMillis(20L)));
testHarness.processElement(insertRecord("key1", 1, fromEpochMillis(0L)));
testHarness.processElement(insertRecord("key1", 1, fromEpochMillis(999L)));
testHarness.processElement(insertRecord("key2", 1, fromEpochMillis(1998L)));
testHarness.processElement(insertRecord("key2", 1, fromEpochMillis(1999L)));
testHarness.processElement(insertRecord("key2", 1, fromEpochMillis(1000L)));
testHarness.processWatermark(new Watermark(999));
expectedOutput.add(insertRecord("key1", 3L, 3L, localMills(-2000L), localMills(1000L)));
expectedOutput.add(new Watermark(999));
ASSERTER.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
testHarness.processWatermark(new Watermark(1999));
expectedOutput.add(insertRecord("key1", 3L, 3L, localMills(-1000L), localMills(2000L)));
expectedOutput.add(insertRecord("key2", 3L, 3L, localMills(-1000L), localMills(2000L)));
expectedOutput.add(new Watermark(1999));
ASSERTER.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
testHarness.processWatermark(new Watermark(2999));
expectedOutput.add(insertRecord("key1", 3L, 3L, localMills(0L), localMills(3000L)));
expectedOutput.add(insertRecord("key2", 3L, 3L, localMills(0L), localMills(3000L)));
expectedOutput.add(new Watermark(2999));
ASSERTER.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
// do a snapshot, close and restore again
testHarness.prepareSnapshotPreBarrier(0L);
OperatorSubtaskState snapshot = testHarness.snapshot(0L, 0);
testHarness.close();
assertTrue("Close was not called.", aggsFunction.closeCalled.get() > 0);
expectedOutput.clear();
testHarness = createTestHarness(operator);
testHarness.setup(OUT_SERIALIZER);
testHarness.initializeState(snapshot);
testHarness.open();
testHarness.processWatermark(new Watermark(3999));
expectedOutput.add(insertRecord("key2", 5L, 5L, localMills(1000L), localMills(4000L)));
expectedOutput.add(new Watermark(3999));
ASSERTER.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
// late element for [1K, 4K), but should be accumulated into [2K, 5K), [3K, 6K)
testHarness.processElement(insertRecord("key2", 1, fromEpochMillis(3500L)));
testHarness.processWatermark(new Watermark(4999));
expectedOutput.add(insertRecord("key2", 3L, 3L, localMills(2000L), localMills(5000L)));
expectedOutput.add(new Watermark(4999));
ASSERTER.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
// late for all assigned windows, should be dropped
testHarness.processElement(insertRecord("key1", 1, fromEpochMillis(2999L)));
testHarness.processWatermark(new Watermark(5999));
expectedOutput.add(insertRecord("key2", 3L, 3L, localMills(3000L), localMills(6000L)));
expectedOutput.add(new Watermark(5999));
ASSERTER.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
// those don't have any effect...
testHarness.processWatermark(new Watermark(6999));
testHarness.processWatermark(new Watermark(7999));
expectedOutput.add(new Watermark(6999));
expectedOutput.add(new Watermark(7999));
ASSERTER.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
assertEquals(1, operator.getNumLateRecordsDropped().getCount());
testHarness.close();
}
Aggregations