Search in sources :

Example 1 with SliceAssigner

use of org.apache.flink.table.runtime.operators.window.slicing.SliceAssigner in project flink by apache.

the class StreamExecWindowAggregate method translateToPlanInternal.

@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
    final ExecEdge inputEdge = getInputEdges().get(0);
    final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
    final RowType inputRowType = (RowType) inputEdge.getOutputType();
    final ZoneId shiftTimeZone = TimeWindowUtil.getShiftTimeZone(windowing.getTimeAttributeType(), config.getLocalTimeZone());
    final SliceAssigner sliceAssigner = createSliceAssigner(windowing, shiftTimeZone);
    // Hopping window requires additional COUNT(*) to determine whether to register next timer
    // through whether the current fired window is empty, see SliceSharedWindowAggProcessor.
    final AggregateInfoList aggInfoList = AggregateUtil.deriveStreamWindowAggregateInfoList(inputRowType, JavaScalaConversionUtil.toScala(Arrays.asList(aggCalls)), windowing.getWindow(), // isStateBackendDataViews
    true);
    final GeneratedNamespaceAggsHandleFunction<Long> generatedAggsHandler = createAggsHandler(sliceAssigner, aggInfoList, config, planner.getRelBuilder(), inputRowType.getChildren(), shiftTimeZone);
    final RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(grouping, InternalTypeInfo.of(inputRowType));
    final LogicalType[] accTypes = convertToLogicalTypes(aggInfoList.getAccTypes());
    final OneInputStreamOperator<RowData, RowData> windowOperator = SlicingWindowAggOperatorBuilder.builder().inputSerializer(new RowDataSerializer(inputRowType)).shiftTimeZone(shiftTimeZone).keySerializer((PagedTypeSerializer<RowData>) selector.getProducedType().toSerializer()).assigner(sliceAssigner).countStarIndex(aggInfoList.getIndexOfCountStar()).aggregate(generatedAggsHandler, new RowDataSerializer(accTypes)).build();
    final OneInputTransformation<RowData, RowData> transform = ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationMeta(WINDOW_AGGREGATE_TRANSFORMATION, config), SimpleOperatorFactory.of(windowOperator), InternalTypeInfo.of(getOutputType()), inputTransform.getParallelism(), WINDOW_AGG_MEMORY_RATIO);
    // set KeyType and Selector for state
    transform.setStateKeySelector(selector);
    transform.setStateKeyType(selector.getProducedType());
    return transform;
}
Also used : SliceAssigner(org.apache.flink.table.runtime.operators.window.slicing.SliceAssigner) OneInputTransformation(org.apache.flink.streaming.api.transformations.OneInputTransformation) Transformation(org.apache.flink.api.dag.Transformation) AggregateInfoList(org.apache.flink.table.planner.plan.utils.AggregateInfoList) ExecEdge(org.apache.flink.table.planner.plan.nodes.exec.ExecEdge) ZoneId(java.time.ZoneId) RowType(org.apache.flink.table.types.logical.RowType) LogicalType(org.apache.flink.table.types.logical.LogicalType) RowData(org.apache.flink.table.data.RowData) RowDataKeySelector(org.apache.flink.table.runtime.keyselector.RowDataKeySelector) RowDataSerializer(org.apache.flink.table.runtime.typeutils.RowDataSerializer)

Example 2 with SliceAssigner

use of org.apache.flink.table.runtime.operators.window.slicing.SliceAssigner in project flink by apache.

the class StreamExecWindowAggregateBase method createSliceAssigner.

// ------------------------------------------------------------------------------------------
// Utilities
// ------------------------------------------------------------------------------------------
protected SliceAssigner createSliceAssigner(WindowingStrategy windowingStrategy, ZoneId shiftTimeZone) {
    WindowSpec windowSpec = windowingStrategy.getWindow();
    if (windowingStrategy instanceof WindowAttachedWindowingStrategy) {
        int windowEndIndex = ((WindowAttachedWindowingStrategy) windowingStrategy).getWindowEnd();
        // we don't need time attribute to assign windows, use a magic value in this case
        SliceAssigner innerAssigner = createSliceAssigner(windowSpec, Integer.MAX_VALUE, shiftTimeZone);
        return SliceAssigners.windowed(windowEndIndex, innerAssigner);
    } else if (windowingStrategy instanceof SliceAttachedWindowingStrategy) {
        int sliceEndIndex = ((SliceAttachedWindowingStrategy) windowingStrategy).getSliceEnd();
        // we don't need time attribute to assign windows, use a magic value in this case
        SliceAssigner innerAssigner = createSliceAssigner(windowSpec, Integer.MAX_VALUE, shiftTimeZone);
        return SliceAssigners.sliced(sliceEndIndex, innerAssigner);
    } else if (windowingStrategy instanceof TimeAttributeWindowingStrategy) {
        final int timeAttributeIndex;
        if (windowingStrategy.isRowtime()) {
            timeAttributeIndex = ((TimeAttributeWindowingStrategy) windowingStrategy).getTimeAttributeIndex();
        } else {
            timeAttributeIndex = -1;
        }
        return createSliceAssigner(windowSpec, timeAttributeIndex, shiftTimeZone);
    } else {
        throw new UnsupportedOperationException(windowingStrategy + " is not supported yet.");
    }
}
Also used : SliceAssigner(org.apache.flink.table.runtime.operators.window.slicing.SliceAssigner) TimeAttributeWindowingStrategy(org.apache.flink.table.planner.plan.logical.TimeAttributeWindowingStrategy) WindowAttachedWindowingStrategy(org.apache.flink.table.planner.plan.logical.WindowAttachedWindowingStrategy) SliceAttachedWindowingStrategy(org.apache.flink.table.planner.plan.logical.SliceAttachedWindowingStrategy) TumblingWindowSpec(org.apache.flink.table.planner.plan.logical.TumblingWindowSpec) WindowSpec(org.apache.flink.table.planner.plan.logical.WindowSpec) HoppingWindowSpec(org.apache.flink.table.planner.plan.logical.HoppingWindowSpec) CumulativeWindowSpec(org.apache.flink.table.planner.plan.logical.CumulativeWindowSpec)

Example 3 with SliceAssigner

use of org.apache.flink.table.runtime.operators.window.slicing.SliceAssigner in project flink by apache.

the class StreamExecGlobalWindowAggregate method translateToPlanInternal.

@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
    final ExecEdge inputEdge = getInputEdges().get(0);
    final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
    final RowType inputRowType = (RowType) inputEdge.getOutputType();
    final ZoneId shiftTimeZone = TimeWindowUtil.getShiftTimeZone(windowing.getTimeAttributeType(), config.getLocalTimeZone());
    final SliceAssigner sliceAssigner = createSliceAssigner(windowing, shiftTimeZone);
    final AggregateInfoList localAggInfoList = AggregateUtil.deriveStreamWindowAggregateInfoList(// should use original input here
    localAggInputRowType, JavaScalaConversionUtil.toScala(Arrays.asList(aggCalls)), windowing.getWindow(), // isStateBackendDataViews
    false);
    final AggregateInfoList globalAggInfoList = AggregateUtil.deriveStreamWindowAggregateInfoList(// should use original input here
    localAggInputRowType, JavaScalaConversionUtil.toScala(Arrays.asList(aggCalls)), windowing.getWindow(), // isStateBackendDataViews
    true);
    // handler used to merge multiple local accumulators into one accumulator,
    // where the accumulators are all on memory
    final GeneratedNamespaceAggsHandleFunction<Long> localAggsHandler = createAggsHandler("LocalWindowAggsHandler", sliceAssigner, localAggInfoList, grouping.length, true, localAggInfoList.getAccTypes(), config, planner.getRelBuilder(), shiftTimeZone);
    // handler used to merge the single local accumulator (on memory) into state accumulator
    final GeneratedNamespaceAggsHandleFunction<Long> globalAggsHandler = createAggsHandler("GlobalWindowAggsHandler", sliceAssigner, globalAggInfoList, 0, true, localAggInfoList.getAccTypes(), config, planner.getRelBuilder(), shiftTimeZone);
    // handler used to merge state accumulators for merging slices into window,
    // e.g. Hop and Cumulate
    final GeneratedNamespaceAggsHandleFunction<Long> stateAggsHandler = createAggsHandler("StateWindowAggsHandler", sliceAssigner, globalAggInfoList, 0, false, globalAggInfoList.getAccTypes(), config, planner.getRelBuilder(), shiftTimeZone);
    final RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(grouping, InternalTypeInfo.of(inputRowType));
    final LogicalType[] accTypes = convertToLogicalTypes(globalAggInfoList.getAccTypes());
    final OneInputStreamOperator<RowData, RowData> windowOperator = SlicingWindowAggOperatorBuilder.builder().inputSerializer(new RowDataSerializer(inputRowType)).shiftTimeZone(shiftTimeZone).keySerializer((PagedTypeSerializer<RowData>) selector.getProducedType().toSerializer()).assigner(sliceAssigner).countStarIndex(globalAggInfoList.getIndexOfCountStar()).globalAggregate(localAggsHandler, globalAggsHandler, stateAggsHandler, new RowDataSerializer(accTypes)).build();
    final OneInputTransformation<RowData, RowData> transform = ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationMeta(GLOBAL_WINDOW_AGGREGATE_TRANSFORMATION, config), SimpleOperatorFactory.of(windowOperator), InternalTypeInfo.of(getOutputType()), inputTransform.getParallelism(), WINDOW_AGG_MEMORY_RATIO);
    // set KeyType and Selector for state
    transform.setStateKeySelector(selector);
    transform.setStateKeyType(selector.getProducedType());
    return transform;
}
Also used : SliceAssigner(org.apache.flink.table.runtime.operators.window.slicing.SliceAssigner) OneInputTransformation(org.apache.flink.streaming.api.transformations.OneInputTransformation) Transformation(org.apache.flink.api.dag.Transformation) AggregateInfoList(org.apache.flink.table.planner.plan.utils.AggregateInfoList) ExecEdge(org.apache.flink.table.planner.plan.nodes.exec.ExecEdge) ZoneId(java.time.ZoneId) RowType(org.apache.flink.table.types.logical.RowType) LogicalType(org.apache.flink.table.types.logical.LogicalType) RowData(org.apache.flink.table.data.RowData) RowDataKeySelector(org.apache.flink.table.runtime.keyselector.RowDataKeySelector) RowDataSerializer(org.apache.flink.table.runtime.typeutils.RowDataSerializer)

Example 4 with SliceAssigner

use of org.apache.flink.table.runtime.operators.window.slicing.SliceAssigner in project flink by apache.

the class SlicingWindowAggOperatorTest method testProcessingTimeHoppingWindows.

@Test
public void testProcessingTimeHoppingWindows() throws Exception {
    final SliceAssigner assigner = SliceAssigners.hopping(-1, shiftTimeZone, Duration.ofHours(3), Duration.ofHours(1));
    final SumAndCountAggsFunction aggsFunction = new SumAndCountAggsFunction(assigner);
    SlicingWindowOperator<RowData, ?> operator = SlicingWindowAggOperatorBuilder.builder().inputSerializer(INPUT_ROW_SER).shiftTimeZone(shiftTimeZone).keySerializer(KEY_SER).assigner(assigner).aggregate(wrapGenerated(aggsFunction), ACC_SER).countStarIndex(1).build();
    OneInputStreamOperatorTestHarness<RowData, RowData> testHarness = createTestHarness(operator);
    testHarness.setup(OUT_SERIALIZER);
    testHarness.open();
    // process elements
    ConcurrentLinkedQueue<Object> expectedOutput = new ConcurrentLinkedQueue<>();
    // timestamp is ignored in processing time
    testHarness.setProcessingTime(epochMills(shiftTimeZone, "1970-01-01T00:00:00.003"));
    testHarness.processElement(insertRecord("key2", 1, fromEpochMillis(Long.MAX_VALUE)));
    testHarness.setProcessingTime(epochMills(shiftTimeZone, "1970-01-01T01:00:00"));
    expectedOutput.add(insertRecord("key2", 1L, 1L, epochMills(UTC_ZONE_ID, "1969-12-31T22:00:00"), epochMills(UTC_ZONE_ID, "1970-01-01T01:00:00")));
    ASSERTER.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
    testHarness.processElement(insertRecord("key2", 1, fromEpochMillis(Long.MAX_VALUE)));
    testHarness.processElement(insertRecord("key2", 1, fromEpochMillis(Long.MAX_VALUE)));
    testHarness.setProcessingTime(epochMills(shiftTimeZone, "1970-01-01T02:00:00"));
    expectedOutput.add(insertRecord("key2", 3L, 3L, epochMills(UTC_ZONE_ID, "1969-12-31T23:00:00"), epochMills(UTC_ZONE_ID, "1970-01-01T02:00:00")));
    ASSERTER.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
    testHarness.processElement(insertRecord("key1", 1, fromEpochMillis(Long.MAX_VALUE)));
    testHarness.processElement(insertRecord("key1", 1, fromEpochMillis(Long.MAX_VALUE)));
    testHarness.setProcessingTime(epochMills(shiftTimeZone, "1970-01-01T03:00:00"));
    expectedOutput.add(insertRecord("key2", 3L, 3L, epochMills(UTC_ZONE_ID, "1970-01-01T00:00:00"), epochMills(UTC_ZONE_ID, "1970-01-01T03:00:00")));
    expectedOutput.add(insertRecord("key1", 2L, 2L, epochMills(UTC_ZONE_ID, "1970-01-01T00:00:00"), epochMills(UTC_ZONE_ID, "1970-01-01T03:00:00")));
    ASSERTER.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
    testHarness.processElement(insertRecord("key1", 1, fromEpochMillis(Long.MAX_VALUE)));
    testHarness.processElement(insertRecord("key1", 1, fromEpochMillis(Long.MAX_VALUE)));
    testHarness.processElement(insertRecord("key1", 1, fromEpochMillis(Long.MAX_VALUE)));
    testHarness.setProcessingTime(epochMills(shiftTimeZone, "1970-01-01T07:00:00"));
    expectedOutput.add(insertRecord("key2", 2L, 2L, epochMills(UTC_ZONE_ID, "1970-01-01T01:00:00"), epochMills(UTC_ZONE_ID, "1970-01-01T04:00:00")));
    expectedOutput.add(insertRecord("key1", 5L, 5L, epochMills(UTC_ZONE_ID, "1970-01-01T01:00:00"), epochMills(UTC_ZONE_ID, "1970-01-01T04:00:00")));
    expectedOutput.add(insertRecord("key1", 5L, 5L, epochMills(UTC_ZONE_ID, "1970-01-01T02:00:00"), epochMills(UTC_ZONE_ID, "1970-01-01T05:00:00")));
    expectedOutput.add(insertRecord("key1", 3L, 3L, epochMills(UTC_ZONE_ID, "1970-01-01T03:00:00"), epochMills(UTC_ZONE_ID, "1970-01-01T06:00:00")));
    ASSERTER.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
    testHarness.close();
    assertTrue("Close was not called.", aggsFunction.closeCalled.get() > 0);
}
Also used : SliceAssigner(org.apache.flink.table.runtime.operators.window.slicing.SliceAssigner) GenericRowData(org.apache.flink.table.data.GenericRowData) RowData(org.apache.flink.table.data.RowData) JoinedRowData(org.apache.flink.table.data.utils.JoinedRowData) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) Test(org.junit.Test)

Example 5 with SliceAssigner

use of org.apache.flink.table.runtime.operators.window.slicing.SliceAssigner in project flink by apache.

the class SlicingWindowAggOperatorTest method testEventTimeHoppingWindows.

@Test
public void testEventTimeHoppingWindows() throws Exception {
    final SliceAssigner assigner = SliceAssigners.hopping(2, shiftTimeZone, Duration.ofSeconds(3), Duration.ofSeconds(1));
    final SumAndCountAggsFunction aggsFunction = new SumAndCountAggsFunction(assigner);
    SlicingWindowOperator<RowData, ?> operator = SlicingWindowAggOperatorBuilder.builder().inputSerializer(INPUT_ROW_SER).shiftTimeZone(shiftTimeZone).keySerializer(KEY_SER).assigner(assigner).aggregate(wrapGenerated(aggsFunction), ACC_SER).countStarIndex(1).build();
    OneInputStreamOperatorTestHarness<RowData, RowData> testHarness = createTestHarness(operator);
    testHarness.setup(OUT_SERIALIZER);
    testHarness.open();
    // process elements
    ConcurrentLinkedQueue<Object> expectedOutput = new ConcurrentLinkedQueue<>();
    // add elements out-of-order
    testHarness.processElement(insertRecord("key2", 1, fromEpochMillis(3999L)));
    testHarness.processElement(insertRecord("key2", 1, fromEpochMillis(3000L)));
    testHarness.processElement(insertRecord("key1", 1, fromEpochMillis(20L)));
    testHarness.processElement(insertRecord("key1", 1, fromEpochMillis(0L)));
    testHarness.processElement(insertRecord("key1", 1, fromEpochMillis(999L)));
    testHarness.processElement(insertRecord("key2", 1, fromEpochMillis(1998L)));
    testHarness.processElement(insertRecord("key2", 1, fromEpochMillis(1999L)));
    testHarness.processElement(insertRecord("key2", 1, fromEpochMillis(1000L)));
    testHarness.processWatermark(new Watermark(999));
    expectedOutput.add(insertRecord("key1", 3L, 3L, localMills(-2000L), localMills(1000L)));
    expectedOutput.add(new Watermark(999));
    ASSERTER.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
    testHarness.processWatermark(new Watermark(1999));
    expectedOutput.add(insertRecord("key1", 3L, 3L, localMills(-1000L), localMills(2000L)));
    expectedOutput.add(insertRecord("key2", 3L, 3L, localMills(-1000L), localMills(2000L)));
    expectedOutput.add(new Watermark(1999));
    ASSERTER.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
    testHarness.processWatermark(new Watermark(2999));
    expectedOutput.add(insertRecord("key1", 3L, 3L, localMills(0L), localMills(3000L)));
    expectedOutput.add(insertRecord("key2", 3L, 3L, localMills(0L), localMills(3000L)));
    expectedOutput.add(new Watermark(2999));
    ASSERTER.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
    // do a snapshot, close and restore again
    testHarness.prepareSnapshotPreBarrier(0L);
    OperatorSubtaskState snapshot = testHarness.snapshot(0L, 0);
    testHarness.close();
    assertTrue("Close was not called.", aggsFunction.closeCalled.get() > 0);
    expectedOutput.clear();
    testHarness = createTestHarness(operator);
    testHarness.setup(OUT_SERIALIZER);
    testHarness.initializeState(snapshot);
    testHarness.open();
    testHarness.processWatermark(new Watermark(3999));
    expectedOutput.add(insertRecord("key2", 5L, 5L, localMills(1000L), localMills(4000L)));
    expectedOutput.add(new Watermark(3999));
    ASSERTER.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
    // late element for [1K, 4K), but should be accumulated into [2K, 5K), [3K, 6K)
    testHarness.processElement(insertRecord("key2", 1, fromEpochMillis(3500L)));
    testHarness.processWatermark(new Watermark(4999));
    expectedOutput.add(insertRecord("key2", 3L, 3L, localMills(2000L), localMills(5000L)));
    expectedOutput.add(new Watermark(4999));
    ASSERTER.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
    // late for all assigned windows, should be dropped
    testHarness.processElement(insertRecord("key1", 1, fromEpochMillis(2999L)));
    testHarness.processWatermark(new Watermark(5999));
    expectedOutput.add(insertRecord("key2", 3L, 3L, localMills(3000L), localMills(6000L)));
    expectedOutput.add(new Watermark(5999));
    ASSERTER.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
    // those don't have any effect...
    testHarness.processWatermark(new Watermark(6999));
    testHarness.processWatermark(new Watermark(7999));
    expectedOutput.add(new Watermark(6999));
    expectedOutput.add(new Watermark(7999));
    ASSERTER.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
    assertEquals(1, operator.getNumLateRecordsDropped().getCount());
    testHarness.close();
}
Also used : SliceAssigner(org.apache.flink.table.runtime.operators.window.slicing.SliceAssigner) GenericRowData(org.apache.flink.table.data.GenericRowData) RowData(org.apache.flink.table.data.RowData) JoinedRowData(org.apache.flink.table.data.utils.JoinedRowData) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) Watermark(org.apache.flink.streaming.api.watermark.Watermark) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) Test(org.junit.Test)

Aggregations

SliceAssigner (org.apache.flink.table.runtime.operators.window.slicing.SliceAssigner)11 RowData (org.apache.flink.table.data.RowData)9 Test (org.junit.Test)7 ConcurrentLinkedQueue (java.util.concurrent.ConcurrentLinkedQueue)6 GenericRowData (org.apache.flink.table.data.GenericRowData)6 JoinedRowData (org.apache.flink.table.data.utils.JoinedRowData)6 ZoneId (java.time.ZoneId)3 Transformation (org.apache.flink.api.dag.Transformation)3 OperatorSubtaskState (org.apache.flink.runtime.checkpoint.OperatorSubtaskState)3 Watermark (org.apache.flink.streaming.api.watermark.Watermark)3 ExecEdge (org.apache.flink.table.planner.plan.nodes.exec.ExecEdge)3 AggregateInfoList (org.apache.flink.table.planner.plan.utils.AggregateInfoList)3 RowDataKeySelector (org.apache.flink.table.runtime.keyselector.RowDataKeySelector)3 RowDataSerializer (org.apache.flink.table.runtime.typeutils.RowDataSerializer)3 RowType (org.apache.flink.table.types.logical.RowType)3 OneInputTransformation (org.apache.flink.streaming.api.transformations.OneInputTransformation)2 LogicalType (org.apache.flink.table.types.logical.LogicalType)2 CumulativeWindowSpec (org.apache.flink.table.planner.plan.logical.CumulativeWindowSpec)1 HoppingWindowSpec (org.apache.flink.table.planner.plan.logical.HoppingWindowSpec)1 SliceAttachedWindowingStrategy (org.apache.flink.table.planner.plan.logical.SliceAttachedWindowingStrategy)1