use of org.apache.calcite.plan.RelOptCluster in project hive by apache.
the class HiveRelDecorrelator method decorrelateQuery.
// ~ Methods ----------------------------------------------------------------
/**
* Decorrelates a query.
*
* <p>This is the main entry point to {@code RelDecorrelator}.
*
* @param rootRel Root node of the query
*
* @return Equivalent query with all
* {@link org.apache.calcite.rel.logical.LogicalCorrelate} instances removed
*/
public static RelNode decorrelateQuery(RelNode rootRel) {
final CorelMap corelMap = new CorelMapBuilder().build(rootRel);
if (!corelMap.hasCorrelation()) {
return rootRel;
}
final RelOptCluster cluster = rootRel.getCluster();
final HiveRelDecorrelator decorrelator = new HiveRelDecorrelator(cluster, corelMap, cluster.getPlanner().getContext());
RelNode newRootRel = decorrelator.removeCorrelationViaRule(rootRel);
if (!decorrelator.cm.mapCorToCorRel.isEmpty()) {
newRootRel = decorrelator.decorrelate(newRootRel);
}
return newRootRel;
}
use of org.apache.calcite.plan.RelOptCluster in project hive by apache.
the class HiveRelDecorrelator method createValueGenerator.
/**
* Create RelNode tree that produces a list of correlated variables.
*/
private RelNode createValueGenerator(Iterable<CorRef> correlations, int valueGenFieldOffset, SortedMap<CorDef, Integer> corDefOutputs) {
final Map<RelNode, List<Integer>> mapNewInputToOutputs = new HashMap<>();
final Map<RelNode, Integer> mapNewInputToNewOffset = new HashMap<>();
// Add to map all the referenced positions (relative to each input rel).
for (CorRef corVar : correlations) {
final int oldCorVarOffset = corVar.field;
final RelNode oldInput = getCorRel(corVar);
assert oldInput != null;
final Frame frame = map.get(oldInput);
assert frame != null;
final RelNode newInput = frame.r;
final List<Integer> newLocalOutputs;
if (!mapNewInputToOutputs.containsKey(newInput)) {
newLocalOutputs = new ArrayList<>();
} else {
newLocalOutputs = mapNewInputToOutputs.get(newInput);
}
final int newCorVarOffset = frame.oldToNewOutputs.get(oldCorVarOffset);
// Add all unique positions referenced.
if (!newLocalOutputs.contains(newCorVarOffset)) {
newLocalOutputs.add(newCorVarOffset);
}
mapNewInputToOutputs.put(newInput, newLocalOutputs);
}
int offset = 0;
// Project only the correlated fields out of each inputRel
// and join the projectRel together.
// To make sure the plan does not change in terms of join order,
// join these rels based on their occurrence in cor var list which
// is sorted.
final Set<RelNode> joinedInputs = new HashSet<>();
RelNode r = null;
for (CorRef corVar : correlations) {
final RelNode oldInput = getCorRel(corVar);
assert oldInput != null;
final RelNode newInput = map.get(oldInput).r;
assert newInput != null;
if (!joinedInputs.contains(newInput)) {
RelNode project = RelOptUtil.createProject(HiveRelFactories.HIVE_PROJECT_FACTORY, newInput, mapNewInputToOutputs.get(newInput));
RelNode distinct = relBuilder.push(project).distinct().build();
RelOptCluster cluster = distinct.getCluster();
joinedInputs.add(newInput);
mapNewInputToNewOffset.put(newInput, offset);
offset += distinct.getRowType().getFieldCount();
if (r == null) {
r = distinct;
} else {
r = relBuilder.push(r).push(distinct).join(JoinRelType.INNER, cluster.getRexBuilder().makeLiteral(true)).build();
}
}
}
// referencing correlated variables.
for (CorRef corRef : correlations) {
// The first input of a Correlator is always the rel defining
// the correlated variables.
final RelNode oldInput = getCorRel(corRef);
assert oldInput != null;
final Frame frame = map.get(oldInput);
final RelNode newInput = frame.r;
assert newInput != null;
final List<Integer> newLocalOutputs = mapNewInputToOutputs.get(newInput);
final int newLocalOutput = frame.oldToNewOutputs.get(corRef.field);
// newOutput is the index of the cor var in the referenced
// position list plus the offset of referenced position list of
// each newInput.
final int newOutput = newLocalOutputs.indexOf(newLocalOutput) + mapNewInputToNewOffset.get(newInput) + valueGenFieldOffset;
corDefOutputs.put(corRef.def(), newOutput);
}
return r;
}
use of org.apache.calcite.plan.RelOptCluster in project hive by apache.
the class RelFieldTrimmer method dummyProject.
/**
* Creates a project with a dummy column, to protect the parts of the system
* that cannot handle a relational expression with no columns.
*
* @param fieldCount Number of fields in the original relational expression
* @param input Trimmed input
* @return Dummy project, or null if no dummy is required
*/
protected TrimResult dummyProject(int fieldCount, RelNode input) {
final RelOptCluster cluster = input.getCluster();
final Mapping mapping = Mappings.create(MappingType.INVERSE_SURJECTION, fieldCount, 1);
if (input.getRowType().getFieldCount() == 1) {
// created for the child). We can't do better.
return result(input, mapping);
}
final RexLiteral expr = cluster.getRexBuilder().makeExactLiteral(BigDecimal.ZERO);
final RelBuilder relBuilder = REL_BUILDER.get();
relBuilder.push(input);
relBuilder.project(ImmutableList.<RexNode>of(expr), ImmutableList.of("DUMMY"));
return result(relBuilder.build(), mapping);
}
use of org.apache.calcite.plan.RelOptCluster in project hive by apache.
the class HiveIntersectRewriteRule method onMatch.
// ~ Methods ----------------------------------------------------------------
public void onMatch(RelOptRuleCall call) {
final HiveIntersect hiveIntersect = call.rel(0);
final RelOptCluster cluster = hiveIntersect.getCluster();
final RexBuilder rexBuilder = cluster.getRexBuilder();
int numOfBranch = hiveIntersect.getInputs().size();
Builder<RelNode> bldr = new ImmutableList.Builder<RelNode>();
// 1st level GB: create a GB (col0, col1, count(1) as c) for each branch
for (int index = 0; index < numOfBranch; index++) {
RelNode input = hiveIntersect.getInputs().get(index);
final List<RexNode> gbChildProjLst = Lists.newArrayList();
final List<Integer> groupSetPositions = Lists.newArrayList();
for (int cInd = 0; cInd < input.getRowType().getFieldList().size(); cInd++) {
gbChildProjLst.add(rexBuilder.makeInputRef(input, cInd));
groupSetPositions.add(cInd);
}
gbChildProjLst.add(rexBuilder.makeBigintLiteral(new BigDecimal(1)));
// create the project before GB because we need a new project with extra column '1'.
RelNode gbInputRel = null;
try {
gbInputRel = HiveProject.create(input, gbChildProjLst, null);
} catch (CalciteSemanticException e) {
LOG.debug(e.toString());
throw new RuntimeException(e);
}
// groupSetPosition includes all the positions
final ImmutableBitSet groupSet = ImmutableBitSet.of(groupSetPositions);
List<AggregateCall> aggregateCalls = Lists.newArrayList();
RelDataType aggFnRetType = TypeConverter.convert(TypeInfoFactory.longTypeInfo, cluster.getTypeFactory());
// count(1), 1's position is input.getRowType().getFieldList().size()
AggregateCall aggregateCall = HiveCalciteUtil.createSingleArgAggCall("count", cluster, TypeInfoFactory.longTypeInfo, input.getRowType().getFieldList().size(), aggFnRetType);
aggregateCalls.add(aggregateCall);
HiveRelNode aggregateRel = new HiveAggregate(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), gbInputRel, groupSet, null, aggregateCalls);
bldr.add(aggregateRel);
}
// create a union above all the branches
HiveRelNode union = new HiveUnion(cluster, TraitsUtil.getDefaultTraitSet(cluster), bldr.build());
// 2nd level GB: create a GB (col0, col1, count(c)) for each branch
final List<Integer> groupSetPositions = Lists.newArrayList();
// the index of c
int cInd = union.getRowType().getFieldList().size() - 1;
for (int index = 0; index < union.getRowType().getFieldList().size(); index++) {
if (index != cInd) {
groupSetPositions.add(index);
}
}
List<AggregateCall> aggregateCalls = Lists.newArrayList();
RelDataType aggFnRetType = TypeConverter.convert(TypeInfoFactory.longTypeInfo, cluster.getTypeFactory());
AggregateCall aggregateCall = HiveCalciteUtil.createSingleArgAggCall("count", cluster, TypeInfoFactory.longTypeInfo, cInd, aggFnRetType);
aggregateCalls.add(aggregateCall);
if (hiveIntersect.all) {
aggregateCall = HiveCalciteUtil.createSingleArgAggCall("min", cluster, TypeInfoFactory.longTypeInfo, cInd, aggFnRetType);
aggregateCalls.add(aggregateCall);
}
final ImmutableBitSet groupSet = ImmutableBitSet.of(groupSetPositions);
HiveRelNode aggregateRel = new HiveAggregate(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), union, groupSet, null, aggregateCalls);
// add a filter count(c) = #branches
int countInd = cInd;
List<RexNode> childRexNodeLst = new ArrayList<RexNode>();
RexInputRef ref = rexBuilder.makeInputRef(aggregateRel, countInd);
RexLiteral literal = rexBuilder.makeBigintLiteral(new BigDecimal(numOfBranch));
childRexNodeLst.add(ref);
childRexNodeLst.add(literal);
ImmutableList.Builder<RelDataType> calciteArgTypesBldr = new ImmutableList.Builder<RelDataType>();
calciteArgTypesBldr.add(TypeConverter.convert(TypeInfoFactory.longTypeInfo, cluster.getTypeFactory()));
calciteArgTypesBldr.add(TypeConverter.convert(TypeInfoFactory.longTypeInfo, cluster.getTypeFactory()));
RexNode factoredFilterExpr = null;
try {
factoredFilterExpr = rexBuilder.makeCall(SqlFunctionConverter.getCalciteFn("=", calciteArgTypesBldr.build(), TypeConverter.convert(TypeInfoFactory.longTypeInfo, cluster.getTypeFactory()), true, false), childRexNodeLst);
} catch (CalciteSemanticException e) {
LOG.debug(e.toString());
throw new RuntimeException(e);
}
RelNode filterRel = new HiveFilter(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), aggregateRel, factoredFilterExpr);
if (!hiveIntersect.all) {
// the schema for intersect distinct is like this
// R3 on all attributes + count(c) as cnt
// finally add a project to project out the last column
Set<Integer> projectOutColumnPositions = new HashSet<>();
projectOutColumnPositions.add(filterRel.getRowType().getFieldList().size() - 1);
try {
call.transformTo(HiveCalciteUtil.createProjectWithoutColumn(filterRel, projectOutColumnPositions));
} catch (CalciteSemanticException e) {
LOG.debug(e.toString());
throw new RuntimeException(e);
}
} else {
// the schema for intersect all is like this
// R3 + count(c) as cnt + min(c) as m
// we create a input project for udtf whose schema is like this
// min(c) as m + R3
List<RexNode> originalInputRefs = Lists.transform(filterRel.getRowType().getFieldList(), new Function<RelDataTypeField, RexNode>() {
@Override
public RexNode apply(RelDataTypeField input) {
return new RexInputRef(input.getIndex(), input.getType());
}
});
List<RexNode> copyInputRefs = new ArrayList<>();
copyInputRefs.add(originalInputRefs.get(originalInputRefs.size() - 1));
for (int i = 0; i < originalInputRefs.size() - 2; i++) {
copyInputRefs.add(originalInputRefs.get(i));
}
RelNode srcRel = null;
try {
srcRel = HiveProject.create(filterRel, copyInputRefs, null);
HiveTableFunctionScan udtf = HiveCalciteUtil.createUDTFForSetOp(cluster, srcRel);
// finally add a project to project out the 1st column
Set<Integer> projectOutColumnPositions = new HashSet<>();
projectOutColumnPositions.add(0);
call.transformTo(HiveCalciteUtil.createProjectWithoutColumn(udtf, projectOutColumnPositions));
} catch (SemanticException e) {
LOG.debug(e.toString());
throw new RuntimeException(e);
}
}
}
use of org.apache.calcite.plan.RelOptCluster in project hive by apache.
the class HiveProjectSortExchangeTransposeRule method onMatch.
// ~ Methods ----------------------------------------------------------------
// implement RelOptRule
public void onMatch(RelOptRuleCall call) {
final HiveProject project = call.rel(0);
final HiveSortExchange sortExchange = call.rel(1);
final RelOptCluster cluster = project.getCluster();
List<RelFieldCollation> fieldCollations = getNewRelFieldCollations(project, sortExchange.getCollation(), cluster);
if (fieldCollations == null) {
return;
}
RelCollation newCollation = RelCollationTraitDef.INSTANCE.canonize(RelCollationImpl.of(fieldCollations));
List<Integer> newDistributionKeys = getNewRelDistributionKeys(project, sortExchange.getDistribution());
RelDistribution newDistribution = RelDistributionTraitDef.INSTANCE.canonize(new HiveRelDistribution(sortExchange.getDistribution().getType(), newDistributionKeys));
RelTraitSet newTraitSet = TraitsUtil.getDefaultTraitSet(sortExchange.getCluster()).replace(newCollation).replace(newDistribution);
// New operators
final RelNode newProject = project.copy(sortExchange.getInput().getTraitSet(), ImmutableList.of(sortExchange.getInput()));
final SortExchange newSort = sortExchange.copy(newTraitSet, newProject, newDistribution, newCollation);
call.transformTo(newSort);
}
Aggregations