Search in sources :

Example 1 with GeneratedJoinCondition

use of org.apache.flink.table.runtime.generated.GeneratedJoinCondition in project flink by apache.

the class BatchExecSortMergeJoin method translateToPlanInternal.

@Override
@SuppressWarnings("unchecked")
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
    ExecEdge leftInputEdge = getInputEdges().get(0);
    ExecEdge rightInputEdge = getInputEdges().get(1);
    // get input types
    RowType leftType = (RowType) leftInputEdge.getOutputType();
    RowType rightType = (RowType) rightInputEdge.getOutputType();
    LogicalType[] keyFieldTypes = IntStream.of(leftKeys).mapToObj(leftType::getTypeAt).toArray(LogicalType[]::new);
    RowType keyType = RowType.of(keyFieldTypes);
    GeneratedJoinCondition condFunc = JoinUtil.generateConditionFunction(config.getTableConfig(), nonEquiCondition, leftType, rightType);
    long externalBufferMemory = config.get(ExecutionConfigOptions.TABLE_EXEC_RESOURCE_EXTERNAL_BUFFER_MEMORY).getBytes();
    long sortMemory = config.get(ExecutionConfigOptions.TABLE_EXEC_RESOURCE_SORT_MEMORY).getBytes();
    int externalBufferNum = 1;
    if (joinType == FlinkJoinType.FULL) {
        externalBufferNum = 2;
    }
    long managedMemory = externalBufferMemory * externalBufferNum + sortMemory * 2;
    SortCodeGenerator leftSortGen = newSortGen(config, leftKeys, leftType);
    SortCodeGenerator rightSortGen = newSortGen(config, rightKeys, rightType);
    int[] keyPositions = IntStream.range(0, leftKeys.length).toArray();
    SortMergeJoinOperator operator = new SortMergeJoinOperator(1.0 * externalBufferMemory / managedMemory, joinType, leftIsSmaller, condFunc, ProjectionCodeGenerator.generateProjection(new CodeGeneratorContext(config.getTableConfig()), "SMJProjection", leftType, keyType, leftKeys), ProjectionCodeGenerator.generateProjection(new CodeGeneratorContext(config.getTableConfig()), "SMJProjection", rightType, keyType, rightKeys), leftSortGen.generateNormalizedKeyComputer("LeftComputer"), leftSortGen.generateRecordComparator("LeftComparator"), rightSortGen.generateNormalizedKeyComputer("RightComputer"), rightSortGen.generateRecordComparator("RightComparator"), newSortGen(config, keyPositions, keyType).generateRecordComparator("KeyComparator"), filterNulls);
    Transformation<RowData> leftInputTransform = (Transformation<RowData>) leftInputEdge.translateToPlan(planner);
    Transformation<RowData> rightInputTransform = (Transformation<RowData>) rightInputEdge.translateToPlan(planner);
    return ExecNodeUtil.createTwoInputTransformation(leftInputTransform, rightInputTransform, createTransformationName(config), createTransformationDescription(config), SimpleOperatorFactory.of(operator), InternalTypeInfo.of(getOutputType()), rightInputTransform.getParallelism(), managedMemory);
}
Also used : Transformation(org.apache.flink.api.dag.Transformation) ExecEdge(org.apache.flink.table.planner.plan.nodes.exec.ExecEdge) CodeGeneratorContext(org.apache.flink.table.planner.codegen.CodeGeneratorContext) RowType(org.apache.flink.table.types.logical.RowType) LogicalType(org.apache.flink.table.types.logical.LogicalType) GeneratedJoinCondition(org.apache.flink.table.runtime.generated.GeneratedJoinCondition) RowData(org.apache.flink.table.data.RowData) SortMergeJoinOperator(org.apache.flink.table.runtime.operators.join.SortMergeJoinOperator) SortCodeGenerator(org.apache.flink.table.planner.codegen.sort.SortCodeGenerator)

Example 2 with GeneratedJoinCondition

use of org.apache.flink.table.runtime.generated.GeneratedJoinCondition in project flink by apache.

the class BatchExecHashJoin method translateToPlanInternal.

@Override
@SuppressWarnings("unchecked")
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
    ExecEdge leftInputEdge = getInputEdges().get(0);
    ExecEdge rightInputEdge = getInputEdges().get(1);
    Transformation<RowData> leftInputTransform = (Transformation<RowData>) leftInputEdge.translateToPlan(planner);
    Transformation<RowData> rightInputTransform = (Transformation<RowData>) rightInputEdge.translateToPlan(planner);
    // get input types
    RowType leftType = (RowType) leftInputEdge.getOutputType();
    RowType rightType = (RowType) rightInputEdge.getOutputType();
    JoinUtil.validateJoinSpec(joinSpec, leftType, rightType, false);
    int[] leftKeys = joinSpec.getLeftKeys();
    int[] rightKeys = joinSpec.getRightKeys();
    LogicalType[] keyFieldTypes = IntStream.of(leftKeys).mapToObj(leftType::getTypeAt).toArray(LogicalType[]::new);
    RowType keyType = RowType.of(keyFieldTypes);
    GeneratedJoinCondition condFunc = JoinUtil.generateConditionFunction(config.getTableConfig(), joinSpec.getNonEquiCondition().orElse(null), leftType, rightType);
    // projection for equals
    GeneratedProjection leftProj = ProjectionCodeGenerator.generateProjection(new CodeGeneratorContext(config.getTableConfig()), "HashJoinLeftProjection", leftType, keyType, leftKeys);
    GeneratedProjection rightProj = ProjectionCodeGenerator.generateProjection(new CodeGeneratorContext(config.getTableConfig()), "HashJoinRightProjection", rightType, keyType, rightKeys);
    Transformation<RowData> buildTransform;
    Transformation<RowData> probeTransform;
    GeneratedProjection buildProj;
    GeneratedProjection probeProj;
    int[] buildKeys;
    int[] probeKeys;
    RowType buildType;
    RowType probeType;
    int buildRowSize;
    long buildRowCount;
    long probeRowCount;
    boolean reverseJoin = !leftIsBuild;
    if (leftIsBuild) {
        buildTransform = leftInputTransform;
        buildProj = leftProj;
        buildType = leftType;
        buildRowSize = estimatedLeftAvgRowSize;
        buildRowCount = estimatedLeftRowCount;
        buildKeys = leftKeys;
        probeTransform = rightInputTransform;
        probeProj = rightProj;
        probeType = rightType;
        probeRowCount = estimatedLeftRowCount;
        probeKeys = rightKeys;
    } else {
        buildTransform = rightInputTransform;
        buildProj = rightProj;
        buildType = rightType;
        buildRowSize = estimatedRightAvgRowSize;
        buildRowCount = estimatedRightRowCount;
        buildKeys = rightKeys;
        probeTransform = leftInputTransform;
        probeProj = leftProj;
        probeType = leftType;
        probeRowCount = estimatedLeftRowCount;
        probeKeys = leftKeys;
    }
    // operator
    StreamOperatorFactory<RowData> operator;
    FlinkJoinType joinType = joinSpec.getJoinType();
    HashJoinType hashJoinType = HashJoinType.of(leftIsBuild, joinType.isLeftOuter(), joinType.isRightOuter(), joinType == FlinkJoinType.SEMI, joinType == FlinkJoinType.ANTI);
    if (LongHashJoinGenerator.support(hashJoinType, keyType, joinSpec.getFilterNulls())) {
        operator = LongHashJoinGenerator.gen(config.getTableConfig(), hashJoinType, keyType, buildType, probeType, buildKeys, probeKeys, buildRowSize, buildRowCount, reverseJoin, condFunc);
    } else {
        operator = SimpleOperatorFactory.of(HashJoinOperator.newHashJoinOperator(hashJoinType, condFunc, reverseJoin, joinSpec.getFilterNulls(), buildProj, probeProj, tryDistinctBuildRow, buildRowSize, buildRowCount, probeRowCount, keyType));
    }
    long managedMemory = config.get(ExecutionConfigOptions.TABLE_EXEC_RESOURCE_HASH_JOIN_MEMORY).getBytes();
    return ExecNodeUtil.createTwoInputTransformation(buildTransform, probeTransform, createTransformationName(config), createTransformationDescription(config), operator, InternalTypeInfo.of(getOutputType()), probeTransform.getParallelism(), managedMemory);
}
Also used : Transformation(org.apache.flink.api.dag.Transformation) ExecEdge(org.apache.flink.table.planner.plan.nodes.exec.ExecEdge) HashJoinType(org.apache.flink.table.runtime.operators.join.HashJoinType) CodeGeneratorContext(org.apache.flink.table.planner.codegen.CodeGeneratorContext) RowType(org.apache.flink.table.types.logical.RowType) LogicalType(org.apache.flink.table.types.logical.LogicalType) FlinkJoinType(org.apache.flink.table.runtime.operators.join.FlinkJoinType) RowData(org.apache.flink.table.data.RowData) GeneratedJoinCondition(org.apache.flink.table.runtime.generated.GeneratedJoinCondition) GeneratedProjection(org.apache.flink.table.runtime.generated.GeneratedProjection)

Example 3 with GeneratedJoinCondition

use of org.apache.flink.table.runtime.generated.GeneratedJoinCondition in project flink by apache.

the class WindowJoinOperatorTest method createTestHarness.

private KeyedTwoInputStreamOperatorTestHarness<RowData, RowData, RowData, RowData> createTestHarness(FlinkJoinType joinType) throws Exception {
    String funcCode = "public class TestWindowJoinCondition extends org.apache.flink.api.common.functions.AbstractRichFunction " + "implements org.apache.flink.table.runtime.generated.JoinCondition {\n" + "\n" + "    public TestWindowJoinCondition(Object[] reference) {\n" + "    }\n" + "\n" + "    @Override\n" + "    public boolean apply(org.apache.flink.table.data.RowData in1, org.apache.flink.table.data.RowData in2) {\n" + "        return true;\n" + "    }\n" + "}\n";
    GeneratedJoinCondition joinFunction = new GeneratedJoinCondition("TestWindowJoinCondition", funcCode, new Object[0]);
    int keyIdx = 1;
    RowDataKeySelector keySelector = HandwrittenSelectorUtil.getRowDataSelector(new int[] { keyIdx }, INPUT_ROW_TYPE.toRowFieldTypes());
    TypeInformation<RowData> keyType = InternalTypeInfo.ofFields();
    WindowJoinOperator operator = WindowJoinOperatorBuilder.builder().leftSerializer(INPUT_ROW_TYPE.toRowSerializer()).rightSerializer(INPUT_ROW_TYPE.toRowSerializer()).generatedJoinCondition(joinFunction).leftWindowEndIndex(0).rightWindowEndIndex(0).filterNullKeys(new boolean[] { true }).joinType(joinType).withShiftTimezone(shiftTimeZone).build();
    KeyedTwoInputStreamOperatorTestHarness<RowData, RowData, RowData, RowData> testHarness = new KeyedTwoInputStreamOperatorTestHarness<>(operator, keySelector, keySelector, keyType);
    return testHarness;
}
Also used : GeneratedJoinCondition(org.apache.flink.table.runtime.generated.GeneratedJoinCondition) RowData(org.apache.flink.table.data.RowData) RowDataKeySelector(org.apache.flink.table.runtime.keyselector.RowDataKeySelector) KeyedTwoInputStreamOperatorTestHarness(org.apache.flink.streaming.util.KeyedTwoInputStreamOperatorTestHarness)

Example 4 with GeneratedJoinCondition

use of org.apache.flink.table.runtime.generated.GeneratedJoinCondition in project flink by apache.

the class StreamExecWindowJoin method translateToPlanInternal.

@Override
@SuppressWarnings("unchecked")
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
    int leftWindowEndIndex = ((WindowAttachedWindowingStrategy) leftWindowing).getWindowEnd();
    int rightWindowEndIndex = ((WindowAttachedWindowingStrategy) rightWindowing).getWindowEnd();
    final ExecEdge leftInputEdge = getInputEdges().get(0);
    final ExecEdge rightInputEdge = getInputEdges().get(1);
    final Transformation<RowData> leftTransform = (Transformation<RowData>) leftInputEdge.translateToPlan(planner);
    final Transformation<RowData> rightTransform = (Transformation<RowData>) rightInputEdge.translateToPlan(planner);
    final RowType leftType = (RowType) leftInputEdge.getOutputType();
    final RowType rightType = (RowType) rightInputEdge.getOutputType();
    JoinUtil.validateJoinSpec(joinSpec, leftType, rightType, true);
    final int[] leftJoinKey = joinSpec.getLeftKeys();
    final int[] rightJoinKey = joinSpec.getRightKeys();
    final InternalTypeInfo<RowData> leftTypeInfo = InternalTypeInfo.of(leftType);
    final InternalTypeInfo<RowData> rightTypeInfo = InternalTypeInfo.of(rightType);
    GeneratedJoinCondition generatedCondition = JoinUtil.generateConditionFunction(config.getTableConfig(), joinSpec, leftType, rightType);
    ZoneId shiftTimeZone = TimeWindowUtil.getShiftTimeZone(leftWindowing.getTimeAttributeType(), config.getLocalTimeZone());
    WindowJoinOperator operator = WindowJoinOperatorBuilder.builder().leftSerializer(leftTypeInfo.toRowSerializer()).rightSerializer(rightTypeInfo.toRowSerializer()).generatedJoinCondition(generatedCondition).leftWindowEndIndex(leftWindowEndIndex).rightWindowEndIndex(rightWindowEndIndex).filterNullKeys(joinSpec.getFilterNulls()).joinType(joinSpec.getJoinType()).withShiftTimezone(shiftTimeZone).build();
    final RowType returnType = (RowType) getOutputType();
    final TwoInputTransformation<RowData, RowData, RowData> transform = ExecNodeUtil.createTwoInputTransformation(leftTransform, rightTransform, createTransformationMeta(WINDOW_JOIN_TRANSFORMATION, config), operator, InternalTypeInfo.of(returnType), leftTransform.getParallelism());
    // set KeyType and Selector for state
    RowDataKeySelector leftSelect = KeySelectorUtil.getRowDataSelector(leftJoinKey, leftTypeInfo);
    RowDataKeySelector rightSelect = KeySelectorUtil.getRowDataSelector(rightJoinKey, rightTypeInfo);
    transform.setStateKeySelectors(leftSelect, rightSelect);
    transform.setStateKeyType(leftSelect.getProducedType());
    return transform;
}
Also used : TwoInputTransformation(org.apache.flink.streaming.api.transformations.TwoInputTransformation) Transformation(org.apache.flink.api.dag.Transformation) ExecEdge(org.apache.flink.table.planner.plan.nodes.exec.ExecEdge) ZoneId(java.time.ZoneId) WindowAttachedWindowingStrategy(org.apache.flink.table.planner.plan.logical.WindowAttachedWindowingStrategy) RowType(org.apache.flink.table.types.logical.RowType) RowData(org.apache.flink.table.data.RowData) GeneratedJoinCondition(org.apache.flink.table.runtime.generated.GeneratedJoinCondition) RowDataKeySelector(org.apache.flink.table.runtime.keyselector.RowDataKeySelector) WindowJoinOperator(org.apache.flink.table.runtime.operators.join.window.WindowJoinOperator)

Example 5 with GeneratedJoinCondition

use of org.apache.flink.table.runtime.generated.GeneratedJoinCondition in project flink by apache.

the class StreamExecIntervalJoin method translateToPlanInternal.

@Override
@SuppressWarnings("unchecked")
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
    ExecEdge leftInputEdge = getInputEdges().get(0);
    ExecEdge rightInputEdge = getInputEdges().get(1);
    RowType leftRowType = (RowType) leftInputEdge.getOutputType();
    RowType rightRowType = (RowType) rightInputEdge.getOutputType();
    Transformation<RowData> leftInputTransform = (Transformation<RowData>) leftInputEdge.translateToPlan(planner);
    Transformation<RowData> rightInputTransform = (Transformation<RowData>) rightInputEdge.translateToPlan(planner);
    RowType returnType = (RowType) getOutputType();
    InternalTypeInfo<RowData> returnTypeInfo = InternalTypeInfo.of(returnType);
    JoinSpec joinSpec = intervalJoinSpec.getJoinSpec();
    IntervalJoinSpec.WindowBounds windowBounds = intervalJoinSpec.getWindowBounds();
    switch(joinSpec.getJoinType()) {
        case INNER:
        case LEFT:
        case RIGHT:
        case FULL:
            long relativeWindowSize = windowBounds.getLeftUpperBound() - windowBounds.getLeftLowerBound();
            if (relativeWindowSize < 0) {
                LOGGER.warn("The relative time interval size " + relativeWindowSize + "is negative, please check the join conditions.");
                return createNegativeWindowSizeJoin(joinSpec, leftInputTransform, rightInputTransform, leftRowType.getFieldCount(), rightRowType.getFieldCount(), returnTypeInfo, config);
            } else {
                GeneratedJoinCondition joinCondition = JoinUtil.generateConditionFunction(config.getTableConfig(), joinSpec, leftRowType, rightRowType);
                IntervalJoinFunction joinFunction = new IntervalJoinFunction(joinCondition, returnTypeInfo, joinSpec.getFilterNulls());
                TwoInputTransformation<RowData, RowData, RowData> transform;
                if (windowBounds.isEventTime()) {
                    transform = createRowTimeJoin(leftInputTransform, rightInputTransform, returnTypeInfo, joinFunction, joinSpec, windowBounds, config);
                } else {
                    transform = createProcTimeJoin(leftInputTransform, rightInputTransform, returnTypeInfo, joinFunction, joinSpec, windowBounds, config);
                }
                if (inputsContainSingleton()) {
                    transform.setParallelism(1);
                    transform.setMaxParallelism(1);
                }
                // set KeyType and Selector for state
                RowDataKeySelector leftSelect = KeySelectorUtil.getRowDataSelector(joinSpec.getLeftKeys(), InternalTypeInfo.of(leftRowType));
                RowDataKeySelector rightSelect = KeySelectorUtil.getRowDataSelector(joinSpec.getRightKeys(), InternalTypeInfo.of(rightRowType));
                transform.setStateKeySelectors(leftSelect, rightSelect);
                transform.setStateKeyType(leftSelect.getProducedType());
                return transform;
            }
        default:
            throw new TableException("Interval Join: " + joinSpec.getJoinType() + " Join between stream " + "and stream is not supported yet.\nplease re-check " + "interval join statement according to description above.");
    }
}
Also used : TwoInputTransformation(org.apache.flink.streaming.api.transformations.TwoInputTransformation) UnionTransformation(org.apache.flink.streaming.api.transformations.UnionTransformation) OneInputTransformation(org.apache.flink.streaming.api.transformations.OneInputTransformation) Transformation(org.apache.flink.api.dag.Transformation) TableException(org.apache.flink.table.api.TableException) ExecEdge(org.apache.flink.table.planner.plan.nodes.exec.ExecEdge) JoinSpec(org.apache.flink.table.planner.plan.nodes.exec.spec.JoinSpec) IntervalJoinSpec(org.apache.flink.table.planner.plan.nodes.exec.spec.IntervalJoinSpec) RowType(org.apache.flink.table.types.logical.RowType) IntervalJoinFunction(org.apache.flink.table.runtime.operators.join.interval.IntervalJoinFunction) RowData(org.apache.flink.table.data.RowData) GeneratedJoinCondition(org.apache.flink.table.runtime.generated.GeneratedJoinCondition) RowDataKeySelector(org.apache.flink.table.runtime.keyselector.RowDataKeySelector) IntervalJoinSpec(org.apache.flink.table.planner.plan.nodes.exec.spec.IntervalJoinSpec)

Aggregations

GeneratedJoinCondition (org.apache.flink.table.runtime.generated.GeneratedJoinCondition)8 RowData (org.apache.flink.table.data.RowData)6 RowType (org.apache.flink.table.types.logical.RowType)6 Transformation (org.apache.flink.api.dag.Transformation)5 ExecEdge (org.apache.flink.table.planner.plan.nodes.exec.ExecEdge)5 RowDataKeySelector (org.apache.flink.table.runtime.keyselector.RowDataKeySelector)4 TwoInputTransformation (org.apache.flink.streaming.api.transformations.TwoInputTransformation)3 CodeGeneratorContext (org.apache.flink.table.planner.codegen.CodeGeneratorContext)3 FlinkJoinType (org.apache.flink.table.runtime.operators.join.FlinkJoinType)2 LogicalType (org.apache.flink.table.types.logical.LogicalType)2 ZoneId (java.time.ZoneId)1 OneInputTransformation (org.apache.flink.streaming.api.transformations.OneInputTransformation)1 UnionTransformation (org.apache.flink.streaming.api.transformations.UnionTransformation)1 KeyedTwoInputStreamOperatorTestHarness (org.apache.flink.streaming.util.KeyedTwoInputStreamOperatorTestHarness)1 TableConfig (org.apache.flink.table.api.TableConfig)1 TableException (org.apache.flink.table.api.TableException)1 ExprCodeGenerator (org.apache.flink.table.planner.codegen.ExprCodeGenerator)1 GeneratedExpression (org.apache.flink.table.planner.codegen.GeneratedExpression)1 SortCodeGenerator (org.apache.flink.table.planner.codegen.sort.SortCodeGenerator)1 WindowAttachedWindowingStrategy (org.apache.flink.table.planner.plan.logical.WindowAttachedWindowingStrategy)1