Search in sources :

Example 91 with ImmutableBitSet

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.util.ImmutableBitSet in project hive by apache.

the class HiveCardinalityPreservingJoinOptimization method trim.

@Override
public RelNode trim(RelBuilder relBuilder, RelNode root) {
    try {
        if (root.getInputs().size() != 1) {
            LOG.debug("Only plans where root has one input are supported. Root: {}", root);
            return root;
        }
        REL_BUILDER.set(relBuilder);
        RexBuilder rexBuilder = relBuilder.getRexBuilder();
        RelNode rootInput = root.getInput(0);
        // Build the list of RexInputRef from root input RowType
        List<RexInputRef> rootFieldList = new ArrayList<>(rootInput.getRowType().getFieldCount());
        List<String> newColumnNames = new ArrayList<>();
        for (int i = 0; i < rootInput.getRowType().getFieldList().size(); ++i) {
            RelDataTypeField relDataTypeField = rootInput.getRowType().getFieldList().get(i);
            rootFieldList.add(rexBuilder.makeInputRef(relDataTypeField.getType(), i));
            newColumnNames.add(relDataTypeField.getName());
        }
        // Bit set to gather the refs that backtrack to constant values
        BitSet constants = new BitSet();
        List<JoinedBackFields> lineages = getExpressionLineageOf(rootFieldList, rootInput, constants);
        if (lineages == null) {
            LOG.debug("Some projected field lineage can not be determined");
            return root;
        }
        // 1. Collect candidate tables for join back and map RexNodes coming from those tables to their index in the
        // rootInput row type
        // Collect all used fields from original plan
        ImmutableBitSet fieldsUsed = ImmutableBitSet.of(constants.stream().toArray());
        List<TableToJoinBack> tableToJoinBackList = new ArrayList<>(lineages.size());
        Map<Integer, RexNode> rexNodesToShuttle = new HashMap<>(rootInput.getRowType().getFieldCount());
        for (JoinedBackFields joinedBackFields : lineages) {
            Optional<ImmutableBitSet> projectedKeys = joinedBackFields.relOptHiveTable.getNonNullableKeys().stream().filter(joinedBackFields.fieldsInSourceTable::contains).findFirst();
            if (projectedKeys.isPresent() && !projectedKeys.get().equals(joinedBackFields.fieldsInSourceTable)) {
                TableToJoinBack tableToJoinBack = new TableToJoinBack(projectedKeys.get(), joinedBackFields);
                tableToJoinBackList.add(tableToJoinBack);
                fieldsUsed = fieldsUsed.union(joinedBackFields.getSource(projectedKeys.get()));
                for (TableInputRefHolder mapping : joinedBackFields.mapping) {
                    if (!fieldsUsed.get(mapping.indexInOriginalRowType)) {
                        rexNodesToShuttle.put(mapping.indexInOriginalRowType, mapping.rexNode);
                    }
                }
            } else {
                fieldsUsed = fieldsUsed.union(joinedBackFields.fieldsInOriginalRowType);
            }
        }
        if (tableToJoinBackList.isEmpty()) {
            LOG.debug("None of the tables has keys projected, unable to join back");
            return root;
        }
        // 2. Trim out non-key fields of joined back tables
        Set<RelDataTypeField> extraFields = Collections.emptySet();
        TrimResult trimResult = dispatchTrimFields(rootInput, fieldsUsed, extraFields);
        RelNode newInput = trimResult.left;
        if (newInput.getRowType().equals(rootInput.getRowType())) {
            LOG.debug("Nothing was trimmed out.");
            return root;
        }
        // 3. Join back tables to the top of original plan
        Mapping newInputMapping = trimResult.right;
        Map<RexTableInputRef, Integer> tableInputRefMapping = new HashMap<>();
        for (TableToJoinBack tableToJoinBack : tableToJoinBackList) {
            LOG.debug("Joining back table {}", tableToJoinBack.joinedBackFields.relOptHiveTable.getName());
            // 3.1. Create new TableScan of tables to join back
            RelOptHiveTable relOptTable = tableToJoinBack.joinedBackFields.relOptHiveTable;
            RelOptCluster cluster = relBuilder.getCluster();
            HiveTableScan tableScan = new HiveTableScan(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), relOptTable, relOptTable.getHiveTableMD().getTableName(), null, false, false);
            // 3.2. Create Project with the required fields from this table
            RelNode projectTableAccessRel = tableScan.project(tableToJoinBack.joinedBackFields.fieldsInSourceTable, new HashSet<>(0), REL_BUILDER.get());
            // 3.3. Create mapping between the Project and TableScan
            Mapping projectMapping = Mappings.create(MappingType.INVERSE_SURJECTION, tableScan.getRowType().getFieldCount(), tableToJoinBack.joinedBackFields.fieldsInSourceTable.cardinality());
            int projectIndex = 0;
            for (int i : tableToJoinBack.joinedBackFields.fieldsInSourceTable) {
                projectMapping.set(i, projectIndex);
                ++projectIndex;
            }
            int offset = newInput.getRowType().getFieldCount();
            // 3.4. Map rexTableInputRef to the index where it can be found in the new Input row type
            for (TableInputRefHolder mapping : tableToJoinBack.joinedBackFields.mapping) {
                int indexInSourceTable = mapping.tableInputRef.getIndex();
                if (!tableToJoinBack.keys.get(indexInSourceTable)) {
                    // 3.5. if this is not a key field it is shifted by the left input field count
                    tableInputRefMapping.put(mapping.tableInputRef, offset + projectMapping.getTarget(indexInSourceTable));
                }
            }
            // 3.7. Create Join
            relBuilder.push(newInput);
            relBuilder.push(projectTableAccessRel);
            RexNode joinCondition = joinCondition(newInput, newInputMapping, tableToJoinBack, projectTableAccessRel, projectMapping, rexBuilder);
            newInput = relBuilder.join(JoinRelType.INNER, joinCondition).build();
        }
        // 4. Collect rexNodes for Project
        TableInputRefMapper mapper = new TableInputRefMapper(tableInputRefMapping, rexBuilder, newInput);
        List<RexNode> rexNodeList = new ArrayList<>(rootInput.getRowType().getFieldCount());
        for (int i = 0; i < rootInput.getRowType().getFieldCount(); i++) {
            RexNode rexNode = rexNodesToShuttle.get(i);
            if (rexNode != null) {
                rexNodeList.add(mapper.apply(rexNode));
            } else {
                int target = newInputMapping.getTarget(i);
                rexNodeList.add(rexBuilder.makeInputRef(newInput.getRowType().getFieldList().get(target).getType(), target));
            }
        }
        // 5. Create Project on top of all Join backs
        relBuilder.push(newInput);
        relBuilder.project(rexNodeList, newColumnNames);
        return root.copy(root.getTraitSet(), singletonList(relBuilder.build()));
    } finally {
        REL_BUILDER.remove();
    }
}
Also used : RelOptCluster(org.apache.calcite.plan.RelOptCluster) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Mapping(org.apache.calcite.util.mapping.Mapping) RexBuilder(org.apache.calcite.rex.RexBuilder) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) BitSet(java.util.BitSet) RexTableInputRef(org.apache.calcite.rex.RexTableInputRef) RelDataTypeField(org.apache.calcite.rel.type.RelDataTypeField) RelOptHiveTable(org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable) HiveRelNode(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode) RelNode(org.apache.calcite.rel.RelNode) RexInputRef(org.apache.calcite.rex.RexInputRef) HiveTableScan(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan) RexNode(org.apache.calcite.rex.RexNode)

Example 92 with ImmutableBitSet

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.util.ImmutableBitSet in project hive by apache.

the class HiveFilterJoinRule method classifyFilters.

/**
 * Classifies filters according to where they should be processed. They
 * either stay where they are, are pushed to the join (if they originated
 * from above the join), or are pushed to one of the children. Filters that
 * are pushed are added to list passed in as input parameters.
 *
 * @param joinRel      join node
 * @param filters      filters to be classified
 * @param pushInto     whether filters can be pushed into the join
 * @param pushLeft     true if filters can be pushed to the left
 * @param pushRight    true if filters can be pushed to the right
 * @param joinFilters  list of filters to push to the join
 * @param leftFilters  list of filters to push to the left child
 * @param rightFilters list of filters to push to the right child
 * @return whether at least one filter was pushed
 */
private static boolean classifyFilters(RelNode joinRel, List<RexNode> filters, boolean pushInto, boolean pushLeft, boolean pushRight, List<RexNode> joinFilters, List<RexNode> leftFilters, List<RexNode> rightFilters) {
    if (Bug.CALCITE_4499_FIXED) {
        throw new AssertionError("Remove this method when [CALCITE-4499] " + "has been fixed and use directly Calcite's RelOptUtil.classifyFilters.");
    }
    RexBuilder rexBuilder = joinRel.getCluster().getRexBuilder();
    List<RelDataTypeField> joinFields = joinRel.getRowType().getFieldList();
    // joinRel.getSystemFieldList().size();
    final int nSysFields = 0;
    final List<RelDataTypeField> leftFields = joinRel.getInputs().get(0).getRowType().getFieldList();
    final int nFieldsLeft = leftFields.size();
    final List<RelDataTypeField> rightFields = joinRel.getInputs().get(1).getRowType().getFieldList();
    final int nFieldsRight = rightFields.size();
    final int nTotalFields = nFieldsLeft + nFieldsRight;
    // set the reference bitmaps for the left and right children
    ImmutableBitSet leftBitmap = ImmutableBitSet.range(nSysFields, nSysFields + nFieldsLeft);
    ImmutableBitSet rightBitmap = ImmutableBitSet.range(nSysFields + nFieldsLeft, nTotalFields);
    final List<RexNode> filtersToRemove = new ArrayList<>();
    for (RexNode filter : filters) {
        final InputFinder inputFinder = InputFinder.analyze(filter);
        final ImmutableBitSet inputBits = inputFinder.build();
        // the filter originate from the left child
        if (pushLeft && leftBitmap.contains(inputBits)) {
            // ignore filters that always evaluate to true
            if (!filter.isAlwaysTrue()) {
                // adjust the field references in the filter to reflect
                // that fields in the left now shift over by the number
                // of system fields
                final RexNode shiftedFilter = shiftFilter(nSysFields, nSysFields + nFieldsLeft, -nSysFields, rexBuilder, joinFields, nTotalFields, leftFields, filter);
                leftFilters.add(shiftedFilter);
            }
            filtersToRemove.add(filter);
        // filters can be pushed to the right child if the right child
        // does not generate NULLs and the only columns referenced in
        // the filter originate from the right child
        } else if (pushRight && rightBitmap.contains(inputBits)) {
            if (!filter.isAlwaysTrue()) {
                // adjust the field references in the filter to reflect
                // that fields in the right now shift over to the left;
                // since we never push filters to a NULL generating
                // child, the types of the source should match the dest
                // so we don't need to explicitly pass the destination
                // fields to RexInputConverter
                final RexNode shiftedFilter = shiftFilter(nSysFields + nFieldsLeft, nTotalFields, -(nSysFields + nFieldsLeft), rexBuilder, joinFields, nTotalFields, rightFields, filter);
                rightFilters.add(shiftedFilter);
            }
            filtersToRemove.add(filter);
        } else {
            // If the filter can't be pushed to either child, we may push them into the join
            if (pushInto) {
                if (!joinFilters.contains(filter)) {
                    joinFilters.add(filter);
                }
                filtersToRemove.add(filter);
            }
        }
    }
    // Remove filters after the loop, to prevent concurrent modification.
    if (!filtersToRemove.isEmpty()) {
        filters.removeAll(filtersToRemove);
    }
    // Did anything change?
    return !filtersToRemove.isEmpty();
}
Also used : InputFinder(org.apache.calcite.plan.RelOptUtil.InputFinder) RelDataTypeField(org.apache.calcite.rel.type.RelDataTypeField) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) ArrayList(java.util.ArrayList) RexBuilder(org.apache.calcite.rex.RexBuilder) RexNode(org.apache.calcite.rex.RexNode)

Example 93 with ImmutableBitSet

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.util.ImmutableBitSet in project hive by apache.

the class HiveExpandDistinctAggregatesRule method createGroupingSets.

/**
 * @param aggregate: the original aggregate
 * @param argList: the original argList in aggregate
 * @param cleanArgList: the new argList without duplicates
 * @param map: the mapping from the original argList to the new argList
 * @param groupSet: new group set
 * @return
 */
private Aggregate createGroupingSets(Aggregate aggregate, List<List<Integer>> argList, List<List<Integer>> cleanArgList, Map<Integer, Integer> map, ImmutableBitSet groupSet) {
    final List<ImmutableBitSet> origGroupSets = new ArrayList<>();
    for (int i = 0; i < argList.size(); i++) {
        List<Integer> list = argList.get(i);
        ImmutableBitSet bitSet = aggregate.getGroupSet().union(ImmutableBitSet.of(list));
        int prev = origGroupSets.indexOf(bitSet);
        if (prev == -1) {
            origGroupSets.add(bitSet);
            cleanArgList.add(list);
        } else {
            map.put(i, prev);
        }
    }
    // Calcite expects the grouping sets sorted and without duplicates
    origGroupSets.sort(ImmutableBitSet.COMPARATOR);
    List<AggregateCall> aggregateCalls = new ArrayList<AggregateCall>();
    // Create GroupingID column
    AggregateCall aggCall = AggregateCall.create(HiveGroupingID.INSTANCE, false, new ImmutableList.Builder<Integer>().build(), -1, this.cluster.getTypeFactory().createSqlType(SqlTypeName.BIGINT), HiveGroupingID.INSTANCE.getName());
    aggregateCalls.add(aggCall);
    return new HiveAggregate(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), aggregate.getInput(), groupSet, origGroupSets, aggregateCalls);
}
Also used : AggregateCall(org.apache.calcite.rel.core.AggregateCall) HiveAggregate(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) RexBuilder(org.apache.calcite.rex.RexBuilder) ArrayList(java.util.ArrayList)

Example 94 with ImmutableBitSet

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.util.ImmutableBitSet in project hive by apache.

the class HiveSortLimitVisitor method visit.

@Override
OpAttr visit(HiveSortLimit sortRel) throws SemanticException {
    OpAttr inputOpAf = hiveOpConverter.dispatch(sortRel.getInput());
    if (LOG.isDebugEnabled()) {
        LOG.debug("Translating operator rel#" + sortRel.getId() + ":" + sortRel.getRelTypeName() + " with row type: [" + sortRel.getRowType() + "]");
        if (sortRel.getCollation() == RelCollations.EMPTY) {
            LOG.debug("Operator rel#" + sortRel.getId() + ":" + sortRel.getRelTypeName() + " consists of limit");
        } else if (sortRel.fetch == null) {
            LOG.debug("Operator rel#" + sortRel.getId() + ":" + sortRel.getRelTypeName() + " consists of sort");
        } else {
            LOG.debug("Operator rel#" + sortRel.getId() + ":" + sortRel.getRelTypeName() + " consists of sort+limit");
        }
    }
    Operator<?> inputOp = inputOpAf.inputs.get(0);
    Operator<?> resultOp = inputOpAf.inputs.get(0);
    // of their columns
    if (sortRel.getCollation() != RelCollations.EMPTY) {
        // In strict mode, in the presence of order by, limit must be specified.
        if (sortRel.fetch == null) {
            String error = StrictChecks.checkNoLimit(hiveOpConverter.getHiveConf());
            if (error != null)
                throw new SemanticException(error);
        }
        // 1.a. Extract order for each column from collation
        // Generate sortCols and order
        ImmutableBitSet.Builder sortColsPosBuilder = ImmutableBitSet.builder();
        ImmutableBitSet.Builder sortOutputColsPosBuilder = ImmutableBitSet.builder();
        Map<Integer, RexNode> obRefToCallMap = sortRel.getInputRefToCallMap();
        List<ExprNodeDesc> sortCols = new ArrayList<ExprNodeDesc>();
        StringBuilder order = new StringBuilder();
        StringBuilder nullOrder = new StringBuilder();
        for (RelFieldCollation sortInfo : sortRel.getCollation().getFieldCollations()) {
            int sortColumnPos = sortInfo.getFieldIndex();
            ColumnInfo columnInfo = new ColumnInfo(inputOp.getSchema().getSignature().get(sortColumnPos));
            ExprNodeColumnDesc sortColumn = new ExprNodeColumnDesc(columnInfo.getType(), columnInfo.getInternalName(), columnInfo.getTabAlias(), columnInfo.getIsVirtualCol());
            sortCols.add(sortColumn);
            if (sortInfo.getDirection() == RelFieldCollation.Direction.DESCENDING) {
                order.append("-");
            } else {
                order.append("+");
            }
            if (sortInfo.nullDirection == RelFieldCollation.NullDirection.FIRST) {
                nullOrder.append("a");
            } else if (sortInfo.nullDirection == RelFieldCollation.NullDirection.LAST) {
                nullOrder.append("z");
            } else {
                // Default
                nullOrder.append(sortInfo.getDirection() == RelFieldCollation.Direction.DESCENDING ? "z" : "a");
            }
            if (obRefToCallMap != null) {
                RexNode obExpr = obRefToCallMap.get(sortColumnPos);
                sortColsPosBuilder.set(sortColumnPos);
                if (obExpr == null) {
                    sortOutputColsPosBuilder.set(sortColumnPos);
                }
            }
        }
        // Use only 1 reducer for order by
        int numReducers = 1;
        // We keep the columns only the columns that are part of the final output
        List<String> keepColumns = new ArrayList<String>();
        final ImmutableBitSet sortColsPos = sortColsPosBuilder.build();
        final ImmutableBitSet sortOutputColsPos = sortOutputColsPosBuilder.build();
        final List<ColumnInfo> inputSchema = inputOp.getSchema().getSignature();
        for (int pos = 0; pos < inputSchema.size(); pos++) {
            if ((sortColsPos.get(pos) && sortOutputColsPos.get(pos)) || (!sortColsPos.get(pos) && !sortOutputColsPos.get(pos))) {
                keepColumns.add(inputSchema.get(pos).getInternalName());
            }
        }
        // 1.b. Generate reduce sink and project operator
        resultOp = HiveOpConverterUtils.genReduceSinkAndBacktrackSelect(resultOp, sortCols.toArray(new ExprNodeDesc[sortCols.size()]), 0, new ArrayList<ExprNodeDesc>(), order.toString(), nullOrder.toString(), numReducers, Operation.NOT_ACID, hiveOpConverter.getHiveConf(), keepColumns);
    }
    // 2. If we need to generate limit
    if (sortRel.fetch != null) {
        int limit = RexLiteral.intValue(sortRel.fetch);
        int offset = sortRel.offset == null ? 0 : RexLiteral.intValue(sortRel.offset);
        LimitDesc limitDesc = new LimitDesc(offset, limit);
        ArrayList<ColumnInfo> cinfoLst = HiveOpConverterUtils.createColInfos(resultOp);
        resultOp = OperatorFactory.getAndMakeChild(limitDesc, new RowSchema(cinfoLst), resultOp);
        if (LOG.isDebugEnabled()) {
            LOG.debug("Generated " + resultOp + " with row schema: [" + resultOp.getSchema() + "]");
        }
    }
    // 3. Return result
    return inputOpAf.clone(resultOp);
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) OpAttr(org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr) LimitDesc(org.apache.hadoop.hive.ql.plan.LimitDesc) RelFieldCollation(org.apache.calcite.rel.RelFieldCollation) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) RexNode(org.apache.calcite.rex.RexNode)

Example 95 with ImmutableBitSet

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.util.ImmutableBitSet in project hive by apache.

the class ConstraintExprGenerator method getNotNullConstraintExpr.

private T getNotNullConstraintExpr(Table targetTable, RowResolver inputRR, boolean isUpdateStatement) throws SemanticException {
    boolean forceNotNullConstraint = conf.getBoolVar(HiveConf.ConfVars.HIVE_ENFORCE_NOT_NULL_CONSTRAINT);
    if (!forceNotNullConstraint) {
        return null;
    }
    ImmutableBitSet nullConstraintBitSet;
    try {
        nullConstraintBitSet = getEnabledNotNullConstraints(targetTable);
    } catch (SemanticException e) {
        throw e;
    } catch (Exception e) {
        throw (new RuntimeException(e));
    }
    if (nullConstraintBitSet == null) {
        return null;
    }
    T currUDF = null;
    int constraintIdx = 0;
    List<ColumnInfo> inputColInfos = inputRR.getColumnInfos();
    for (int colExprIdx = 0; colExprIdx < inputColInfos.size(); colExprIdx++) {
        if (isUpdateStatement && colExprIdx == 0) {
            // for updates first column is _rowid
            continue;
        }
        if (nullConstraintBitSet.indexOf(constraintIdx) != -1) {
            T currExpr = typeCheckProcFactory.exprFactory.createColumnRefExpr(inputColInfos.get(colExprIdx), inputRR, 0);
            T isNotNullUDF = exprProcessor.getFuncExprNodeDesc("isnotnull", currExpr);
            if (currUDF != null) {
                currUDF = exprProcessor.getFuncExprNodeDesc("and", currUDF, isNotNullUDF);
            } else {
                currUDF = isNotNullUDF;
            }
        }
        constraintIdx++;
    }
    return currUDF;
}
Also used : ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) CheckConstraint(org.apache.hadoop.hive.ql.metadata.CheckConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Aggregations

ImmutableBitSet (org.apache.calcite.util.ImmutableBitSet)208 RexNode (org.apache.calcite.rex.RexNode)127 RelNode (org.apache.calcite.rel.RelNode)110 ArrayList (java.util.ArrayList)101 RelDataTypeField (org.apache.calcite.rel.type.RelDataTypeField)66 RexBuilder (org.apache.calcite.rex.RexBuilder)60 AggregateCall (org.apache.calcite.rel.core.AggregateCall)55 RexInputRef (org.apache.calcite.rex.RexInputRef)45 RelDataType (org.apache.calcite.rel.type.RelDataType)39 HashMap (java.util.HashMap)36 RelBuilder (org.apache.calcite.tools.RelBuilder)36 RelMetadataQuery (org.apache.calcite.rel.metadata.RelMetadataQuery)30 Mapping (org.apache.calcite.util.mapping.Mapping)30 Pair (org.apache.calcite.util.Pair)29 Aggregate (org.apache.calcite.rel.core.Aggregate)27 ImmutableList (com.google.common.collect.ImmutableList)23 LinkedHashSet (java.util.LinkedHashSet)23 List (java.util.List)22 HashSet (java.util.HashSet)20 RelOptUtil (org.apache.calcite.plan.RelOptUtil)18