Search in sources :

Example 51 with RelOptCluster

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.plan.RelOptCluster in project hive by apache.

the class HiveProjectSortExchangeTransposeRule method onMatch.

// ~ Methods ----------------------------------------------------------------
// implement RelOptRule
public void onMatch(RelOptRuleCall call) {
    final HiveProject project = call.rel(0);
    final HiveSortExchange sortExchange = call.rel(1);
    final RelOptCluster cluster = project.getCluster();
    List<RelFieldCollation> fieldCollations = getNewRelFieldCollations(project, sortExchange.getCollation(), cluster);
    if (fieldCollations == null) {
        return;
    }
    RelCollation newCollation = RelCollationTraitDef.INSTANCE.canonize(RelCollationImpl.of(fieldCollations));
    List<Integer> newDistributionKeys = getNewRelDistributionKeys(project, sortExchange.getDistribution());
    RelDistribution newDistribution = RelDistributionTraitDef.INSTANCE.canonize(new HiveRelDistribution(sortExchange.getDistribution().getType(), newDistributionKeys));
    RelTraitSet newTraitSet = TraitsUtil.getDefaultTraitSet(sortExchange.getCluster()).replace(newCollation).replace(newDistribution);
    // New operators
    final RelNode newProject = project.copy(sortExchange.getInput().getTraitSet(), ImmutableList.of(sortExchange.getInput()));
    final SortExchange newSort = sortExchange.copy(newTraitSet, newProject, newDistribution, newCollation);
    call.transformTo(newSort);
}
Also used : RelOptCluster(org.apache.calcite.plan.RelOptCluster) RelTraitSet(org.apache.calcite.plan.RelTraitSet) HiveSortExchange(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortExchange) SortExchange(org.apache.calcite.rel.core.SortExchange) HiveSortExchange(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortExchange) HiveRelDistribution(org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelDistribution) RelCollation(org.apache.calcite.rel.RelCollation) RelNode(org.apache.calcite.rel.RelNode) HiveProject(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject) RelFieldCollation(org.apache.calcite.rel.RelFieldCollation) RelDistribution(org.apache.calcite.rel.RelDistribution) HiveRelDistribution(org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelDistribution)

Example 52 with RelOptCluster

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.plan.RelOptCluster in project hive by apache.

the class HiveProjectSortTransposeRule method onMatch.

// ~ Methods ----------------------------------------------------------------
// implement RelOptRule
public void onMatch(RelOptRuleCall call) {
    final HiveProject project = call.rel(0);
    final HiveSortLimit sort = call.rel(1);
    final RelOptCluster cluster = project.getCluster();
    List<RelFieldCollation> fieldCollations = getNewRelFieldCollations(project, sort.getCollation(), cluster);
    if (fieldCollations == null) {
        return;
    }
    RelTraitSet traitSet = sort.getCluster().traitSetOf(HiveRelNode.CONVENTION);
    RelCollation newCollation = traitSet.canonize(RelCollationImpl.of(fieldCollations));
    // New operators
    final RelNode newProject = project.copy(sort.getInput().getTraitSet(), ImmutableList.of(sort.getInput()));
    final HiveSortLimit newSort = sort.copy(newProject.getTraitSet(), newProject, newCollation, sort.offset, sort.fetch);
    call.transformTo(newSort);
}
Also used : RelOptCluster(org.apache.calcite.plan.RelOptCluster) RelCollation(org.apache.calcite.rel.RelCollation) HiveRelNode(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode) RelNode(org.apache.calcite.rel.RelNode) HiveProject(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject) RelFieldCollation(org.apache.calcite.rel.RelFieldCollation) HiveSortLimit(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit) RelTraitSet(org.apache.calcite.plan.RelTraitSet)

Example 53 with RelOptCluster

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.plan.RelOptCluster in project hive by apache.

the class HiveCardinalityPreservingJoinOptimization method trim.

@Override
public RelNode trim(RelBuilder relBuilder, RelNode root) {
    try {
        if (root.getInputs().size() != 1) {
            LOG.debug("Only plans where root has one input are supported. Root: {}", root);
            return root;
        }
        REL_BUILDER.set(relBuilder);
        RexBuilder rexBuilder = relBuilder.getRexBuilder();
        RelNode rootInput = root.getInput(0);
        // Build the list of RexInputRef from root input RowType
        List<RexInputRef> rootFieldList = new ArrayList<>(rootInput.getRowType().getFieldCount());
        List<String> newColumnNames = new ArrayList<>();
        for (int i = 0; i < rootInput.getRowType().getFieldList().size(); ++i) {
            RelDataTypeField relDataTypeField = rootInput.getRowType().getFieldList().get(i);
            rootFieldList.add(rexBuilder.makeInputRef(relDataTypeField.getType(), i));
            newColumnNames.add(relDataTypeField.getName());
        }
        // Bit set to gather the refs that backtrack to constant values
        BitSet constants = new BitSet();
        List<JoinedBackFields> lineages = getExpressionLineageOf(rootFieldList, rootInput, constants);
        if (lineages == null) {
            LOG.debug("Some projected field lineage can not be determined");
            return root;
        }
        // 1. Collect candidate tables for join back and map RexNodes coming from those tables to their index in the
        // rootInput row type
        // Collect all used fields from original plan
        ImmutableBitSet fieldsUsed = ImmutableBitSet.of(constants.stream().toArray());
        List<TableToJoinBack> tableToJoinBackList = new ArrayList<>(lineages.size());
        Map<Integer, RexNode> rexNodesToShuttle = new HashMap<>(rootInput.getRowType().getFieldCount());
        for (JoinedBackFields joinedBackFields : lineages) {
            Optional<ImmutableBitSet> projectedKeys = joinedBackFields.relOptHiveTable.getNonNullableKeys().stream().filter(joinedBackFields.fieldsInSourceTable::contains).findFirst();
            if (projectedKeys.isPresent() && !projectedKeys.get().equals(joinedBackFields.fieldsInSourceTable)) {
                TableToJoinBack tableToJoinBack = new TableToJoinBack(projectedKeys.get(), joinedBackFields);
                tableToJoinBackList.add(tableToJoinBack);
                fieldsUsed = fieldsUsed.union(joinedBackFields.getSource(projectedKeys.get()));
                for (TableInputRefHolder mapping : joinedBackFields.mapping) {
                    if (!fieldsUsed.get(mapping.indexInOriginalRowType)) {
                        rexNodesToShuttle.put(mapping.indexInOriginalRowType, mapping.rexNode);
                    }
                }
            } else {
                fieldsUsed = fieldsUsed.union(joinedBackFields.fieldsInOriginalRowType);
            }
        }
        if (tableToJoinBackList.isEmpty()) {
            LOG.debug("None of the tables has keys projected, unable to join back");
            return root;
        }
        // 2. Trim out non-key fields of joined back tables
        Set<RelDataTypeField> extraFields = Collections.emptySet();
        TrimResult trimResult = dispatchTrimFields(rootInput, fieldsUsed, extraFields);
        RelNode newInput = trimResult.left;
        if (newInput.getRowType().equals(rootInput.getRowType())) {
            LOG.debug("Nothing was trimmed out.");
            return root;
        }
        // 3. Join back tables to the top of original plan
        Mapping newInputMapping = trimResult.right;
        Map<RexTableInputRef, Integer> tableInputRefMapping = new HashMap<>();
        for (TableToJoinBack tableToJoinBack : tableToJoinBackList) {
            LOG.debug("Joining back table {}", tableToJoinBack.joinedBackFields.relOptHiveTable.getName());
            // 3.1. Create new TableScan of tables to join back
            RelOptHiveTable relOptTable = tableToJoinBack.joinedBackFields.relOptHiveTable;
            RelOptCluster cluster = relBuilder.getCluster();
            HiveTableScan tableScan = new HiveTableScan(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), relOptTable, relOptTable.getHiveTableMD().getTableName(), null, false, false);
            // 3.2. Create Project with the required fields from this table
            RelNode projectTableAccessRel = tableScan.project(tableToJoinBack.joinedBackFields.fieldsInSourceTable, new HashSet<>(0), REL_BUILDER.get());
            // 3.3. Create mapping between the Project and TableScan
            Mapping projectMapping = Mappings.create(MappingType.INVERSE_SURJECTION, tableScan.getRowType().getFieldCount(), tableToJoinBack.joinedBackFields.fieldsInSourceTable.cardinality());
            int projectIndex = 0;
            for (int i : tableToJoinBack.joinedBackFields.fieldsInSourceTable) {
                projectMapping.set(i, projectIndex);
                ++projectIndex;
            }
            int offset = newInput.getRowType().getFieldCount();
            // 3.4. Map rexTableInputRef to the index where it can be found in the new Input row type
            for (TableInputRefHolder mapping : tableToJoinBack.joinedBackFields.mapping) {
                int indexInSourceTable = mapping.tableInputRef.getIndex();
                if (!tableToJoinBack.keys.get(indexInSourceTable)) {
                    // 3.5. if this is not a key field it is shifted by the left input field count
                    tableInputRefMapping.put(mapping.tableInputRef, offset + projectMapping.getTarget(indexInSourceTable));
                }
            }
            // 3.7. Create Join
            relBuilder.push(newInput);
            relBuilder.push(projectTableAccessRel);
            RexNode joinCondition = joinCondition(newInput, newInputMapping, tableToJoinBack, projectTableAccessRel, projectMapping, rexBuilder);
            newInput = relBuilder.join(JoinRelType.INNER, joinCondition).build();
        }
        // 4. Collect rexNodes for Project
        TableInputRefMapper mapper = new TableInputRefMapper(tableInputRefMapping, rexBuilder, newInput);
        List<RexNode> rexNodeList = new ArrayList<>(rootInput.getRowType().getFieldCount());
        for (int i = 0; i < rootInput.getRowType().getFieldCount(); i++) {
            RexNode rexNode = rexNodesToShuttle.get(i);
            if (rexNode != null) {
                rexNodeList.add(mapper.apply(rexNode));
            } else {
                int target = newInputMapping.getTarget(i);
                rexNodeList.add(rexBuilder.makeInputRef(newInput.getRowType().getFieldList().get(target).getType(), target));
            }
        }
        // 5. Create Project on top of all Join backs
        relBuilder.push(newInput);
        relBuilder.project(rexNodeList, newColumnNames);
        return root.copy(root.getTraitSet(), singletonList(relBuilder.build()));
    } finally {
        REL_BUILDER.remove();
    }
}
Also used : RelOptCluster(org.apache.calcite.plan.RelOptCluster) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Mapping(org.apache.calcite.util.mapping.Mapping) RexBuilder(org.apache.calcite.rex.RexBuilder) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) BitSet(java.util.BitSet) RexTableInputRef(org.apache.calcite.rex.RexTableInputRef) RelDataTypeField(org.apache.calcite.rel.type.RelDataTypeField) RelOptHiveTable(org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable) HiveRelNode(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode) RelNode(org.apache.calcite.rel.RelNode) RexInputRef(org.apache.calcite.rex.RexInputRef) HiveTableScan(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan) RexNode(org.apache.calcite.rex.RexNode)

Example 54 with RelOptCluster

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.plan.RelOptCluster in project hive by apache.

the class HiveAggregateReduceFunctionsRule method reduceStddev.

private RexNode reduceStddev(Aggregate oldAggRel, AggregateCall oldCall, boolean biased, boolean sqrt, List<AggregateCall> newCalls, Map<AggregateCall, RexNode> aggCallMapping, List<RexNode> inputExprs) {
    // stddev_pop(x) ==>
    // power(
    // (sum(x * x) - sum(x) * sum(x) / count(x))
    // / count(x),
    // .5)
    // 
    // stddev_samp(x) ==>
    // power(
    // (sum(x * x) - sum(x) * sum(x) / count(x))
    // / nullif(count(x) - 1, 0),
    // .5)
    final int nGroups = oldAggRel.getGroupCount();
    final RelOptCluster cluster = oldAggRel.getCluster();
    final RexBuilder rexBuilder = cluster.getRexBuilder();
    final RelDataTypeFactory typeFactory = cluster.getTypeFactory();
    assert oldCall.getArgList().size() == 1 : oldCall.getArgList();
    final int argOrdinal = oldCall.getArgList().get(0);
    final RelDataType argOrdinalType = getFieldType(oldAggRel.getInput(), argOrdinal);
    final RelDataType oldCallType = typeFactory.createTypeWithNullability(oldCall.getType(), true);
    final RexNode argRef = rexBuilder.ensureType(oldCallType, inputExprs.get(argOrdinal), false);
    final int argRefOrdinal = lookupOrAdd(inputExprs, argRef);
    final RelDataType sumReturnType = getSumReturnType(rexBuilder.getTypeFactory(), argRef.getType());
    final RexNode argSquared = rexBuilder.makeCall(SqlStdOperatorTable.MULTIPLY, argRef, argRef);
    final int argSquaredOrdinal = lookupOrAdd(inputExprs, argSquared);
    final RelDataType sumSquaredReturnType = getSumReturnType(rexBuilder.getTypeFactory(), argSquared.getType());
    final AggregateCall sumArgSquaredAggCall = createAggregateCallWithBinding(typeFactory, new HiveSqlSumAggFunction(oldCall.isDistinct(), ReturnTypes.explicit(sumSquaredReturnType), InferTypes.explicit(Collections.singletonList(argSquared.getType())), // SqlStdOperatorTable.SUM,
    oldCall.getAggregation().getOperandTypeChecker()), argSquared.getType(), oldAggRel, oldCall, argSquaredOrdinal);
    final RexNode sumArgSquared = rexBuilder.addAggCall(sumArgSquaredAggCall, nGroups, oldAggRel.indicator, newCalls, aggCallMapping, ImmutableList.of(sumArgSquaredAggCall.getType()));
    final AggregateCall sumArgAggCall = AggregateCall.create(new HiveSqlSumAggFunction(oldCall.isDistinct(), ReturnTypes.explicit(sumReturnType), InferTypes.explicit(Collections.singletonList(argOrdinalType)), // SqlStdOperatorTable.SUM,
    oldCall.getAggregation().getOperandTypeChecker()), oldCall.isDistinct(), oldCall.isApproximate(), ImmutableIntList.of(argRefOrdinal), oldCall.filterArg, oldAggRel.getGroupCount(), oldAggRel.getInput(), null, null);
    final RexNode sumArg = rexBuilder.addAggCall(sumArgAggCall, nGroups, oldAggRel.indicator, newCalls, aggCallMapping, ImmutableList.of(sumArgAggCall.getType()));
    final RexNode sumArgCast = rexBuilder.ensureType(oldCallType, sumArg, true);
    final RexNode sumSquaredArg = rexBuilder.makeCall(SqlStdOperatorTable.MULTIPLY, sumArgCast, sumArgCast);
    RelDataType countRetType = typeFactory.createTypeWithNullability(typeFactory.createSqlType(SqlTypeName.BIGINT), true);
    final AggregateCall countArgAggCall = AggregateCall.create(new HiveSqlCountAggFunction(oldCall.isDistinct(), ReturnTypes.explicit(countRetType), oldCall.getAggregation().getOperandTypeInference(), // SqlStdOperatorTable.COUNT,
    oldCall.getAggregation().getOperandTypeChecker()), oldCall.isDistinct(), oldCall.isApproximate(), oldCall.getArgList(), oldCall.filterArg, oldAggRel.getGroupCount(), oldAggRel.getInput(), countRetType, null);
    final RexNode countArg = rexBuilder.addAggCall(countArgAggCall, nGroups, oldAggRel.indicator, newCalls, aggCallMapping, ImmutableList.of(argOrdinalType));
    final RexNode avgSumSquaredArg = rexBuilder.makeCall(SqlStdOperatorTable.DIVIDE, sumSquaredArg, countArg);
    final RexNode diff = rexBuilder.makeCall(SqlStdOperatorTable.MINUS, sumArgSquared, avgSumSquaredArg);
    final RexNode denominator;
    if (biased) {
        denominator = countArg;
    } else {
        final RexLiteral one = rexBuilder.makeExactLiteral(BigDecimal.ONE);
        final RexNode nul = rexBuilder.makeCast(countArg.getType(), rexBuilder.constantNull());
        final RexNode countMinusOne = rexBuilder.makeCall(SqlStdOperatorTable.MINUS, countArg, one);
        final RexNode countEqOne = rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, countArg, one);
        denominator = rexBuilder.makeCall(SqlStdOperatorTable.CASE, countEqOne, nul, countMinusOne);
    }
    final RexNode div = rexBuilder.makeCall(SqlStdOperatorTable.DIVIDE, diff, denominator);
    RexNode result = div;
    if (sqrt) {
        final RexNode half = rexBuilder.makeExactLiteral(new BigDecimal("0.5"));
        result = rexBuilder.makeCall(SqlStdOperatorTable.POWER, div, half);
    }
    return rexBuilder.makeCast(oldCall.getType(), result);
}
Also used : RelOptCluster(org.apache.calcite.plan.RelOptCluster) AggregateCall(org.apache.calcite.rel.core.AggregateCall) RexLiteral(org.apache.calcite.rex.RexLiteral) HiveSqlCountAggFunction(org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlCountAggFunction) RelDataTypeFactory(org.apache.calcite.rel.type.RelDataTypeFactory) RexBuilder(org.apache.calcite.rex.RexBuilder) RelDataType(org.apache.calcite.rel.type.RelDataType) HiveSqlSumAggFunction(org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlSumAggFunction) BigDecimal(java.math.BigDecimal) RexNode(org.apache.calcite.rex.RexNode)

Example 55 with RelOptCluster

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.plan.RelOptCluster in project hive by apache.

the class HiveProject method create.

/**
 * Creates a HiveProject with no sort keys.
 *
 * @param child
 *          input relational expression
 * @param exps
 *          set of expressions for the input columns
 * @param fieldNames
 *          aliases of the expressions
 */
public static HiveProject create(RelNode child, List<? extends RexNode> exps, List<String> fieldNames) throws CalciteSemanticException {
    RelOptCluster cluster = child.getCluster();
    // 1 Ensure columnNames are unique - CALCITE-411
    if (fieldNames != null && !Util.isDistinct(fieldNames)) {
        String msg = "Select list contains multiple expressions with the same name." + fieldNames;
        throw new CalciteSemanticException(msg, UnsupportedFeature.Same_name_in_multiple_expressions);
    }
    RelDataType rowType = RexUtil.createStructType(cluster.getTypeFactory(), exps, fieldNames, SqlValidatorUtil.EXPR_SUGGESTER);
    return create(cluster, child, exps, rowType, Collections.<RelCollation>emptyList());
}
Also used : RelOptCluster(org.apache.calcite.plan.RelOptCluster) RelDataType(org.apache.calcite.rel.type.RelDataType) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)

Aggregations

RelOptCluster (org.apache.calcite.plan.RelOptCluster)117 RelNode (org.apache.calcite.rel.RelNode)63 RelTraitSet (org.apache.calcite.plan.RelTraitSet)36 RexBuilder (org.apache.calcite.rex.RexBuilder)35 RexNode (org.apache.calcite.rex.RexNode)31 ArrayList (java.util.ArrayList)26 RelDataType (org.apache.calcite.rel.type.RelDataType)23 Test (org.junit.Test)21 ImmutableBitSet (org.apache.calcite.util.ImmutableBitSet)15 List (java.util.List)13 RelDataTypeField (org.apache.calcite.rel.type.RelDataTypeField)13 RelBuilder (org.apache.calcite.tools.RelBuilder)13 RelCollation (org.apache.calcite.rel.RelCollation)12 RelMetadataQuery (org.apache.calcite.rel.metadata.RelMetadataQuery)11 RelOptTable (org.apache.calcite.plan.RelOptTable)10 ImmutableList (com.google.common.collect.ImmutableList)9 HashMap (java.util.HashMap)9 RelOptPlanner (org.apache.calcite.plan.RelOptPlanner)9 Join (org.apache.calcite.rel.core.Join)9 LogicalJoin (org.apache.calcite.rel.logical.LogicalJoin)9