use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.util.ImmutableBitSet in project hive by apache.
the class HiveCardinalityPreservingJoinOptimization method trim.
@Override
public RelNode trim(RelBuilder relBuilder, RelNode root) {
try {
if (root.getInputs().size() != 1) {
LOG.debug("Only plans where root has one input are supported. Root: {}", root);
return root;
}
REL_BUILDER.set(relBuilder);
RexBuilder rexBuilder = relBuilder.getRexBuilder();
RelNode rootInput = root.getInput(0);
// Build the list of RexInputRef from root input RowType
List<RexInputRef> rootFieldList = new ArrayList<>(rootInput.getRowType().getFieldCount());
List<String> newColumnNames = new ArrayList<>();
for (int i = 0; i < rootInput.getRowType().getFieldList().size(); ++i) {
RelDataTypeField relDataTypeField = rootInput.getRowType().getFieldList().get(i);
rootFieldList.add(rexBuilder.makeInputRef(relDataTypeField.getType(), i));
newColumnNames.add(relDataTypeField.getName());
}
// Bit set to gather the refs that backtrack to constant values
BitSet constants = new BitSet();
List<JoinedBackFields> lineages = getExpressionLineageOf(rootFieldList, rootInput, constants);
if (lineages == null) {
LOG.debug("Some projected field lineage can not be determined");
return root;
}
// 1. Collect candidate tables for join back and map RexNodes coming from those tables to their index in the
// rootInput row type
// Collect all used fields from original plan
ImmutableBitSet fieldsUsed = ImmutableBitSet.of(constants.stream().toArray());
List<TableToJoinBack> tableToJoinBackList = new ArrayList<>(lineages.size());
Map<Integer, RexNode> rexNodesToShuttle = new HashMap<>(rootInput.getRowType().getFieldCount());
for (JoinedBackFields joinedBackFields : lineages) {
Optional<ImmutableBitSet> projectedKeys = joinedBackFields.relOptHiveTable.getNonNullableKeys().stream().filter(joinedBackFields.fieldsInSourceTable::contains).findFirst();
if (projectedKeys.isPresent() && !projectedKeys.get().equals(joinedBackFields.fieldsInSourceTable)) {
TableToJoinBack tableToJoinBack = new TableToJoinBack(projectedKeys.get(), joinedBackFields);
tableToJoinBackList.add(tableToJoinBack);
fieldsUsed = fieldsUsed.union(joinedBackFields.getSource(projectedKeys.get()));
for (TableInputRefHolder mapping : joinedBackFields.mapping) {
if (!fieldsUsed.get(mapping.indexInOriginalRowType)) {
rexNodesToShuttle.put(mapping.indexInOriginalRowType, mapping.rexNode);
}
}
} else {
fieldsUsed = fieldsUsed.union(joinedBackFields.fieldsInOriginalRowType);
}
}
if (tableToJoinBackList.isEmpty()) {
LOG.debug("None of the tables has keys projected, unable to join back");
return root;
}
// 2. Trim out non-key fields of joined back tables
Set<RelDataTypeField> extraFields = Collections.emptySet();
TrimResult trimResult = dispatchTrimFields(rootInput, fieldsUsed, extraFields);
RelNode newInput = trimResult.left;
if (newInput.getRowType().equals(rootInput.getRowType())) {
LOG.debug("Nothing was trimmed out.");
return root;
}
// 3. Join back tables to the top of original plan
Mapping newInputMapping = trimResult.right;
Map<RexTableInputRef, Integer> tableInputRefMapping = new HashMap<>();
for (TableToJoinBack tableToJoinBack : tableToJoinBackList) {
LOG.debug("Joining back table {}", tableToJoinBack.joinedBackFields.relOptHiveTable.getName());
// 3.1. Create new TableScan of tables to join back
RelOptHiveTable relOptTable = tableToJoinBack.joinedBackFields.relOptHiveTable;
RelOptCluster cluster = relBuilder.getCluster();
HiveTableScan tableScan = new HiveTableScan(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), relOptTable, relOptTable.getHiveTableMD().getTableName(), null, false, false);
// 3.2. Create Project with the required fields from this table
RelNode projectTableAccessRel = tableScan.project(tableToJoinBack.joinedBackFields.fieldsInSourceTable, new HashSet<>(0), REL_BUILDER.get());
// 3.3. Create mapping between the Project and TableScan
Mapping projectMapping = Mappings.create(MappingType.INVERSE_SURJECTION, tableScan.getRowType().getFieldCount(), tableToJoinBack.joinedBackFields.fieldsInSourceTable.cardinality());
int projectIndex = 0;
for (int i : tableToJoinBack.joinedBackFields.fieldsInSourceTable) {
projectMapping.set(i, projectIndex);
++projectIndex;
}
int offset = newInput.getRowType().getFieldCount();
// 3.4. Map rexTableInputRef to the index where it can be found in the new Input row type
for (TableInputRefHolder mapping : tableToJoinBack.joinedBackFields.mapping) {
int indexInSourceTable = mapping.tableInputRef.getIndex();
if (!tableToJoinBack.keys.get(indexInSourceTable)) {
// 3.5. if this is not a key field it is shifted by the left input field count
tableInputRefMapping.put(mapping.tableInputRef, offset + projectMapping.getTarget(indexInSourceTable));
}
}
// 3.7. Create Join
relBuilder.push(newInput);
relBuilder.push(projectTableAccessRel);
RexNode joinCondition = joinCondition(newInput, newInputMapping, tableToJoinBack, projectTableAccessRel, projectMapping, rexBuilder);
newInput = relBuilder.join(JoinRelType.INNER, joinCondition).build();
}
// 4. Collect rexNodes for Project
TableInputRefMapper mapper = new TableInputRefMapper(tableInputRefMapping, rexBuilder, newInput);
List<RexNode> rexNodeList = new ArrayList<>(rootInput.getRowType().getFieldCount());
for (int i = 0; i < rootInput.getRowType().getFieldCount(); i++) {
RexNode rexNode = rexNodesToShuttle.get(i);
if (rexNode != null) {
rexNodeList.add(mapper.apply(rexNode));
} else {
int target = newInputMapping.getTarget(i);
rexNodeList.add(rexBuilder.makeInputRef(newInput.getRowType().getFieldList().get(target).getType(), target));
}
}
// 5. Create Project on top of all Join backs
relBuilder.push(newInput);
relBuilder.project(rexNodeList, newColumnNames);
return root.copy(root.getTraitSet(), singletonList(relBuilder.build()));
} finally {
REL_BUILDER.remove();
}
}
use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.util.ImmutableBitSet in project hive by apache.
the class HiveFilterJoinRule method classifyFilters.
/**
* Classifies filters according to where they should be processed. They
* either stay where they are, are pushed to the join (if they originated
* from above the join), or are pushed to one of the children. Filters that
* are pushed are added to list passed in as input parameters.
*
* @param joinRel join node
* @param filters filters to be classified
* @param pushInto whether filters can be pushed into the join
* @param pushLeft true if filters can be pushed to the left
* @param pushRight true if filters can be pushed to the right
* @param joinFilters list of filters to push to the join
* @param leftFilters list of filters to push to the left child
* @param rightFilters list of filters to push to the right child
* @return whether at least one filter was pushed
*/
private static boolean classifyFilters(RelNode joinRel, List<RexNode> filters, boolean pushInto, boolean pushLeft, boolean pushRight, List<RexNode> joinFilters, List<RexNode> leftFilters, List<RexNode> rightFilters) {
if (Bug.CALCITE_4499_FIXED) {
throw new AssertionError("Remove this method when [CALCITE-4499] " + "has been fixed and use directly Calcite's RelOptUtil.classifyFilters.");
}
RexBuilder rexBuilder = joinRel.getCluster().getRexBuilder();
List<RelDataTypeField> joinFields = joinRel.getRowType().getFieldList();
// joinRel.getSystemFieldList().size();
final int nSysFields = 0;
final List<RelDataTypeField> leftFields = joinRel.getInputs().get(0).getRowType().getFieldList();
final int nFieldsLeft = leftFields.size();
final List<RelDataTypeField> rightFields = joinRel.getInputs().get(1).getRowType().getFieldList();
final int nFieldsRight = rightFields.size();
final int nTotalFields = nFieldsLeft + nFieldsRight;
// set the reference bitmaps for the left and right children
ImmutableBitSet leftBitmap = ImmutableBitSet.range(nSysFields, nSysFields + nFieldsLeft);
ImmutableBitSet rightBitmap = ImmutableBitSet.range(nSysFields + nFieldsLeft, nTotalFields);
final List<RexNode> filtersToRemove = new ArrayList<>();
for (RexNode filter : filters) {
final InputFinder inputFinder = InputFinder.analyze(filter);
final ImmutableBitSet inputBits = inputFinder.build();
// the filter originate from the left child
if (pushLeft && leftBitmap.contains(inputBits)) {
// ignore filters that always evaluate to true
if (!filter.isAlwaysTrue()) {
// adjust the field references in the filter to reflect
// that fields in the left now shift over by the number
// of system fields
final RexNode shiftedFilter = shiftFilter(nSysFields, nSysFields + nFieldsLeft, -nSysFields, rexBuilder, joinFields, nTotalFields, leftFields, filter);
leftFilters.add(shiftedFilter);
}
filtersToRemove.add(filter);
// filters can be pushed to the right child if the right child
// does not generate NULLs and the only columns referenced in
// the filter originate from the right child
} else if (pushRight && rightBitmap.contains(inputBits)) {
if (!filter.isAlwaysTrue()) {
// adjust the field references in the filter to reflect
// that fields in the right now shift over to the left;
// since we never push filters to a NULL generating
// child, the types of the source should match the dest
// so we don't need to explicitly pass the destination
// fields to RexInputConverter
final RexNode shiftedFilter = shiftFilter(nSysFields + nFieldsLeft, nTotalFields, -(nSysFields + nFieldsLeft), rexBuilder, joinFields, nTotalFields, rightFields, filter);
rightFilters.add(shiftedFilter);
}
filtersToRemove.add(filter);
} else {
// If the filter can't be pushed to either child, we may push them into the join
if (pushInto) {
if (!joinFilters.contains(filter)) {
joinFilters.add(filter);
}
filtersToRemove.add(filter);
}
}
}
// Remove filters after the loop, to prevent concurrent modification.
if (!filtersToRemove.isEmpty()) {
filters.removeAll(filtersToRemove);
}
// Did anything change?
return !filtersToRemove.isEmpty();
}
use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.util.ImmutableBitSet in project hive by apache.
the class HiveExpandDistinctAggregatesRule method createGroupingSets.
/**
* @param aggregate: the original aggregate
* @param argList: the original argList in aggregate
* @param cleanArgList: the new argList without duplicates
* @param map: the mapping from the original argList to the new argList
* @param groupSet: new group set
* @return
*/
private Aggregate createGroupingSets(Aggregate aggregate, List<List<Integer>> argList, List<List<Integer>> cleanArgList, Map<Integer, Integer> map, ImmutableBitSet groupSet) {
final List<ImmutableBitSet> origGroupSets = new ArrayList<>();
for (int i = 0; i < argList.size(); i++) {
List<Integer> list = argList.get(i);
ImmutableBitSet bitSet = aggregate.getGroupSet().union(ImmutableBitSet.of(list));
int prev = origGroupSets.indexOf(bitSet);
if (prev == -1) {
origGroupSets.add(bitSet);
cleanArgList.add(list);
} else {
map.put(i, prev);
}
}
// Calcite expects the grouping sets sorted and without duplicates
origGroupSets.sort(ImmutableBitSet.COMPARATOR);
List<AggregateCall> aggregateCalls = new ArrayList<AggregateCall>();
// Create GroupingID column
AggregateCall aggCall = AggregateCall.create(HiveGroupingID.INSTANCE, false, new ImmutableList.Builder<Integer>().build(), -1, this.cluster.getTypeFactory().createSqlType(SqlTypeName.BIGINT), HiveGroupingID.INSTANCE.getName());
aggregateCalls.add(aggCall);
return new HiveAggregate(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), aggregate.getInput(), groupSet, origGroupSets, aggregateCalls);
}
use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.util.ImmutableBitSet in project hive by apache.
the class HiveSortLimitVisitor method visit.
@Override
OpAttr visit(HiveSortLimit sortRel) throws SemanticException {
OpAttr inputOpAf = hiveOpConverter.dispatch(sortRel.getInput());
if (LOG.isDebugEnabled()) {
LOG.debug("Translating operator rel#" + sortRel.getId() + ":" + sortRel.getRelTypeName() + " with row type: [" + sortRel.getRowType() + "]");
if (sortRel.getCollation() == RelCollations.EMPTY) {
LOG.debug("Operator rel#" + sortRel.getId() + ":" + sortRel.getRelTypeName() + " consists of limit");
} else if (sortRel.fetch == null) {
LOG.debug("Operator rel#" + sortRel.getId() + ":" + sortRel.getRelTypeName() + " consists of sort");
} else {
LOG.debug("Operator rel#" + sortRel.getId() + ":" + sortRel.getRelTypeName() + " consists of sort+limit");
}
}
Operator<?> inputOp = inputOpAf.inputs.get(0);
Operator<?> resultOp = inputOpAf.inputs.get(0);
// of their columns
if (sortRel.getCollation() != RelCollations.EMPTY) {
// In strict mode, in the presence of order by, limit must be specified.
if (sortRel.fetch == null) {
String error = StrictChecks.checkNoLimit(hiveOpConverter.getHiveConf());
if (error != null)
throw new SemanticException(error);
}
// 1.a. Extract order for each column from collation
// Generate sortCols and order
ImmutableBitSet.Builder sortColsPosBuilder = ImmutableBitSet.builder();
ImmutableBitSet.Builder sortOutputColsPosBuilder = ImmutableBitSet.builder();
Map<Integer, RexNode> obRefToCallMap = sortRel.getInputRefToCallMap();
List<ExprNodeDesc> sortCols = new ArrayList<ExprNodeDesc>();
StringBuilder order = new StringBuilder();
StringBuilder nullOrder = new StringBuilder();
for (RelFieldCollation sortInfo : sortRel.getCollation().getFieldCollations()) {
int sortColumnPos = sortInfo.getFieldIndex();
ColumnInfo columnInfo = new ColumnInfo(inputOp.getSchema().getSignature().get(sortColumnPos));
ExprNodeColumnDesc sortColumn = new ExprNodeColumnDesc(columnInfo.getType(), columnInfo.getInternalName(), columnInfo.getTabAlias(), columnInfo.getIsVirtualCol());
sortCols.add(sortColumn);
if (sortInfo.getDirection() == RelFieldCollation.Direction.DESCENDING) {
order.append("-");
} else {
order.append("+");
}
if (sortInfo.nullDirection == RelFieldCollation.NullDirection.FIRST) {
nullOrder.append("a");
} else if (sortInfo.nullDirection == RelFieldCollation.NullDirection.LAST) {
nullOrder.append("z");
} else {
// Default
nullOrder.append(sortInfo.getDirection() == RelFieldCollation.Direction.DESCENDING ? "z" : "a");
}
if (obRefToCallMap != null) {
RexNode obExpr = obRefToCallMap.get(sortColumnPos);
sortColsPosBuilder.set(sortColumnPos);
if (obExpr == null) {
sortOutputColsPosBuilder.set(sortColumnPos);
}
}
}
// Use only 1 reducer for order by
int numReducers = 1;
// We keep the columns only the columns that are part of the final output
List<String> keepColumns = new ArrayList<String>();
final ImmutableBitSet sortColsPos = sortColsPosBuilder.build();
final ImmutableBitSet sortOutputColsPos = sortOutputColsPosBuilder.build();
final List<ColumnInfo> inputSchema = inputOp.getSchema().getSignature();
for (int pos = 0; pos < inputSchema.size(); pos++) {
if ((sortColsPos.get(pos) && sortOutputColsPos.get(pos)) || (!sortColsPos.get(pos) && !sortOutputColsPos.get(pos))) {
keepColumns.add(inputSchema.get(pos).getInternalName());
}
}
// 1.b. Generate reduce sink and project operator
resultOp = HiveOpConverterUtils.genReduceSinkAndBacktrackSelect(resultOp, sortCols.toArray(new ExprNodeDesc[sortCols.size()]), 0, new ArrayList<ExprNodeDesc>(), order.toString(), nullOrder.toString(), numReducers, Operation.NOT_ACID, hiveOpConverter.getHiveConf(), keepColumns);
}
// 2. If we need to generate limit
if (sortRel.fetch != null) {
int limit = RexLiteral.intValue(sortRel.fetch);
int offset = sortRel.offset == null ? 0 : RexLiteral.intValue(sortRel.offset);
LimitDesc limitDesc = new LimitDesc(offset, limit);
ArrayList<ColumnInfo> cinfoLst = HiveOpConverterUtils.createColInfos(resultOp);
resultOp = OperatorFactory.getAndMakeChild(limitDesc, new RowSchema(cinfoLst), resultOp);
if (LOG.isDebugEnabled()) {
LOG.debug("Generated " + resultOp + " with row schema: [" + resultOp.getSchema() + "]");
}
}
// 3. Return result
return inputOpAf.clone(resultOp);
}
use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.util.ImmutableBitSet in project hive by apache.
the class ConstraintExprGenerator method getNotNullConstraintExpr.
private T getNotNullConstraintExpr(Table targetTable, RowResolver inputRR, boolean isUpdateStatement) throws SemanticException {
boolean forceNotNullConstraint = conf.getBoolVar(HiveConf.ConfVars.HIVE_ENFORCE_NOT_NULL_CONSTRAINT);
if (!forceNotNullConstraint) {
return null;
}
ImmutableBitSet nullConstraintBitSet;
try {
nullConstraintBitSet = getEnabledNotNullConstraints(targetTable);
} catch (SemanticException e) {
throw e;
} catch (Exception e) {
throw (new RuntimeException(e));
}
if (nullConstraintBitSet == null) {
return null;
}
T currUDF = null;
int constraintIdx = 0;
List<ColumnInfo> inputColInfos = inputRR.getColumnInfos();
for (int colExprIdx = 0; colExprIdx < inputColInfos.size(); colExprIdx++) {
if (isUpdateStatement && colExprIdx == 0) {
// for updates first column is _rowid
continue;
}
if (nullConstraintBitSet.indexOf(constraintIdx) != -1) {
T currExpr = typeCheckProcFactory.exprFactory.createColumnRefExpr(inputColInfos.get(colExprIdx), inputRR, 0);
T isNotNullUDF = exprProcessor.getFuncExprNodeDesc("isnotnull", currExpr);
if (currUDF != null) {
currUDF = exprProcessor.getFuncExprNodeDesc("and", currUDF, isNotNullUDF);
} else {
currUDF = isNotNullUDF;
}
}
constraintIdx++;
}
return currUDF;
}
Aggregations