use of org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion in project hive by apache.
the class HiveSortUnionReduceRule method onMatch.
public void onMatch(RelOptRuleCall call) {
final HiveSortLimit sort = call.rel(0);
final HiveUnion union = call.rel(1);
List<RelNode> inputs = new ArrayList<>();
// Thus we use 'finishPushSortPastUnion' as a flag to identify if we have finished pushing the
// sort past a union.
boolean finishPushSortPastUnion = true;
final int offset = sort.offset == null ? 0 : RexLiteral.intValue(sort.offset);
for (RelNode input : union.getInputs()) {
// If we do not reduce the input size, we bail out
if (RexLiteral.intValue(sort.fetch) + offset < call.getMetadataQuery().getRowCount(input)) {
finishPushSortPastUnion = false;
// Here we do some query rewrite. We first get the new fetchRN, which is
// a sum of offset and fetch.
// We then push it through by creating a new branchSort with the new
// fetchRN but no offset.
RexNode fetchRN = sort.getCluster().getRexBuilder().makeExactLiteral(BigDecimal.valueOf(RexLiteral.intValue(sort.fetch) + offset));
HiveSortLimit branchSort = sort.copy(sort.getTraitSet(), input, sort.getCollation(), null, fetchRN);
branchSort.setRuleCreated(true);
inputs.add(branchSort);
} else {
inputs.add(input);
}
}
// there is nothing to change
if (finishPushSortPastUnion) {
return;
}
// create new union and sort
HiveUnion unionCopy = (HiveUnion) union.copy(union.getTraitSet(), inputs, union.all);
HiveSortLimit result = sort.copy(sort.getTraitSet(), unionCopy, sort.getCollation(), sort.offset, sort.fetch);
call.transformTo(result);
}
use of org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion in project hive by apache.
the class HiveUnionMergeRule method onMatch.
// ~ Methods ----------------------------------------------------------------
public void onMatch(RelOptRuleCall call) {
final HiveUnion topUnion = call.rel(0);
final HiveUnion bottomUnion;
if (call.rel(2) instanceof HiveUnion) {
bottomUnion = call.rel(2);
} else if (call.rel(1) instanceof HiveUnion) {
bottomUnion = call.rel(1);
} else {
return;
}
List<RelNode> inputs = new ArrayList<>();
if (call.rel(2) instanceof HiveUnion) {
for (int i = 0; i < topUnion.getInputs().size(); i++) {
if (i != 1) {
inputs.add(topUnion.getInput(i));
}
}
inputs.addAll(bottomUnion.getInputs());
} else {
inputs.addAll(bottomUnion.getInputs());
inputs.addAll(Util.skip(topUnion.getInputs()));
}
HiveUnion newUnion = (HiveUnion) topUnion.copy(topUnion.getTraitSet(), inputs, true);
call.transformTo(newUnion);
}
use of org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion in project hive by apache.
the class HiveUnionSimpleSelectsToInlineTableRule method onMatch.
@Override
public void onMatch(RelOptRuleCall call) {
RexBuilder rexBuilder = call.builder().getRexBuilder();
final HiveUnion union = call.rel(0);
if (!union.all) {
return;
}
List<RelNode> inputs = new ArrayList<RelNode>();
List<Project> projects = new ArrayList<>();
List<HiveTableFunctionScan> inlineTables = new ArrayList<>();
for (RelNode input : union.getInputs()) {
input = HiveRelDecorrelator.stripHep(input);
if (isPlainProject(input)) {
projects.add((Project) input);
continue;
}
if (isInlineTableOperand(input)) {
inlineTables.add((HiveTableFunctionScan) input);
continue;
}
inputs.add(input);
}
if (projects.size() + inlineTables.size() <= 1) {
// nothing to do
return;
}
RowStorage newRows = new RowStorage();
for (HiveTableFunctionScan rel : inlineTables) {
// inline(array(row1,row2,...))
RexCall rex = (RexCall) ((RexCall) rel.getCall()).operands.get(0);
for (RexNode row : rex.operands) {
if (!(row.getType() instanceof RelRecordType)) {
return;
}
newRows.addRow(row);
}
}
for (Project proj : projects) {
RexNode row = rexBuilder.makeCall(SqlStdOperatorTable.ROW, proj.getProjects());
if (!(row.getType() instanceof RelRecordType)) {
return;
}
newRows.addRow(row);
}
if (newRows.keySet().size() + inputs.size() == union.getInputs().size()) {
// nothing to do
return;
}
if (dummyTable == null) {
LOG.warn("Unexpected; rule would match - but dummyTable is not available");
return;
}
for (RelRecordType type : newRows.keySet()) {
List<RexNode> rows = newRows.get(type);
RelDataType arrayType = rexBuilder.getTypeFactory().createArrayType(type, -1);
try {
SqlOperator inlineFn = SqlFunctionConverter.getCalciteFn("inline", Collections.singletonList(arrayType), type, true, false);
SqlOperator arrayFn = SqlFunctionConverter.getCalciteFn("array", Collections.nCopies(rows.size(), type), arrayType, true, false);
RexNode expr = rexBuilder.makeCall(arrayFn, rows);
expr = rexBuilder.makeCall(inlineFn, expr);
RelNode newInlineTable = buildTableFunctionScan(expr, union.getCluster());
inputs.add(newInlineTable);
} catch (CalciteSemanticException e) {
LOG.debug("Conversion failed with exception", e);
return;
}
}
if (inputs.size() > 1) {
HiveUnion newUnion = (HiveUnion) union.copy(union.getTraitSet(), inputs, true);
call.transformTo(newUnion);
} else {
call.transformTo(inputs.get(0));
}
}
use of org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion in project hive by apache.
the class HiveIntersectRewriteRule method onMatch.
// ~ Methods ----------------------------------------------------------------
public void onMatch(RelOptRuleCall call) {
final HiveIntersect hiveIntersect = call.rel(0);
final RelOptCluster cluster = hiveIntersect.getCluster();
final RexBuilder rexBuilder = cluster.getRexBuilder();
int numOfBranch = hiveIntersect.getInputs().size();
Builder<RelNode> bldr = new ImmutableList.Builder<RelNode>();
// 1st level GB: create a GB (col0, col1, count(1) as c) for each branch
for (int index = 0; index < numOfBranch; index++) {
RelNode input = hiveIntersect.getInputs().get(index);
final List<RexNode> gbChildProjLst = Lists.newArrayList();
final List<Integer> groupSetPositions = Lists.newArrayList();
for (int cInd = 0; cInd < input.getRowType().getFieldList().size(); cInd++) {
gbChildProjLst.add(rexBuilder.makeInputRef(input, cInd));
groupSetPositions.add(cInd);
}
gbChildProjLst.add(rexBuilder.makeBigintLiteral(new BigDecimal(1)));
// create the project before GB because we need a new project with extra column '1'.
RelNode gbInputRel = null;
try {
gbInputRel = HiveProject.create(input, gbChildProjLst, null);
} catch (CalciteSemanticException e) {
LOG.debug(e.toString());
throw new RuntimeException(e);
}
// groupSetPosition includes all the positions
final ImmutableBitSet groupSet = ImmutableBitSet.of(groupSetPositions);
List<AggregateCall> aggregateCalls = Lists.newArrayList();
RelDataType aggFnRetType = TypeConverter.convert(TypeInfoFactory.longTypeInfo, cluster.getTypeFactory());
// count(1), 1's position is input.getRowType().getFieldList().size()
AggregateCall aggregateCall = HiveCalciteUtil.createSingleArgAggCall("count", cluster, TypeInfoFactory.longTypeInfo, input.getRowType().getFieldList().size(), aggFnRetType);
aggregateCalls.add(aggregateCall);
HiveRelNode aggregateRel = new HiveAggregate(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), gbInputRel, groupSet, null, aggregateCalls);
bldr.add(aggregateRel);
}
// create a union above all the branches
HiveRelNode union = new HiveUnion(cluster, TraitsUtil.getDefaultTraitSet(cluster), bldr.build());
// 2nd level GB: create a GB (col0, col1, count(c)) for each branch
final List<Integer> groupSetPositions = Lists.newArrayList();
// the index of c
int cInd = union.getRowType().getFieldList().size() - 1;
for (int index = 0; index < union.getRowType().getFieldList().size(); index++) {
if (index != cInd) {
groupSetPositions.add(index);
}
}
List<AggregateCall> aggregateCalls = Lists.newArrayList();
RelDataType aggFnRetType = TypeConverter.convert(TypeInfoFactory.longTypeInfo, cluster.getTypeFactory());
AggregateCall aggregateCall = HiveCalciteUtil.createSingleArgAggCall("count", cluster, TypeInfoFactory.longTypeInfo, cInd, aggFnRetType);
aggregateCalls.add(aggregateCall);
if (hiveIntersect.all) {
aggregateCall = HiveCalciteUtil.createSingleArgAggCall("min", cluster, TypeInfoFactory.longTypeInfo, cInd, aggFnRetType);
aggregateCalls.add(aggregateCall);
}
final ImmutableBitSet groupSet = ImmutableBitSet.of(groupSetPositions);
HiveRelNode aggregateRel = new HiveAggregate(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), union, groupSet, null, aggregateCalls);
// add a filter count(c) = #branches
int countInd = cInd;
List<RexNode> childRexNodeLst = new ArrayList<RexNode>();
RexInputRef ref = rexBuilder.makeInputRef(aggregateRel, countInd);
RexLiteral literal = rexBuilder.makeBigintLiteral(new BigDecimal(numOfBranch));
childRexNodeLst.add(ref);
childRexNodeLst.add(literal);
ImmutableList.Builder<RelDataType> calciteArgTypesBldr = new ImmutableList.Builder<RelDataType>();
calciteArgTypesBldr.add(TypeConverter.convert(TypeInfoFactory.longTypeInfo, cluster.getTypeFactory()));
calciteArgTypesBldr.add(TypeConverter.convert(TypeInfoFactory.longTypeInfo, cluster.getTypeFactory()));
RexNode factoredFilterExpr = null;
try {
factoredFilterExpr = rexBuilder.makeCall(SqlFunctionConverter.getCalciteFn("=", calciteArgTypesBldr.build(), TypeConverter.convert(TypeInfoFactory.longTypeInfo, cluster.getTypeFactory()), true, false), childRexNodeLst);
} catch (CalciteSemanticException e) {
LOG.debug(e.toString());
throw new RuntimeException(e);
}
RelNode filterRel = new HiveFilter(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), aggregateRel, factoredFilterExpr);
if (!hiveIntersect.all) {
// the schema for intersect distinct is like this
// R3 on all attributes + count(c) as cnt
// finally add a project to project out the last column
Set<Integer> projectOutColumnPositions = new HashSet<>();
projectOutColumnPositions.add(filterRel.getRowType().getFieldList().size() - 1);
try {
call.transformTo(HiveCalciteUtil.createProjectWithoutColumn(filterRel, projectOutColumnPositions));
} catch (CalciteSemanticException e) {
LOG.debug(e.toString());
throw new RuntimeException(e);
}
} else {
// the schema for intersect all is like this
// R3 + count(c) as cnt + min(c) as m
// we create a input project for udtf whose schema is like this
// min(c) as m + R3
List<RexNode> originalInputRefs = Lists.transform(filterRel.getRowType().getFieldList(), new Function<RelDataTypeField, RexNode>() {
@Override
public RexNode apply(RelDataTypeField input) {
return new RexInputRef(input.getIndex(), input.getType());
}
});
List<RexNode> copyInputRefs = new ArrayList<>();
copyInputRefs.add(originalInputRefs.get(originalInputRefs.size() - 1));
for (int i = 0; i < originalInputRefs.size() - 2; i++) {
copyInputRefs.add(originalInputRefs.get(i));
}
RelNode srcRel = null;
try {
srcRel = HiveProject.create(filterRel, copyInputRefs, null);
HiveTableFunctionScan udtf = HiveCalciteUtil.createUDTFForSetOp(cluster, srcRel);
// finally add a project to project out the 1st column
Set<Integer> projectOutColumnPositions = new HashSet<>();
projectOutColumnPositions.add(0);
call.transformTo(HiveCalciteUtil.createProjectWithoutColumn(udtf, projectOutColumnPositions));
} catch (SemanticException e) {
LOG.debug(e.toString());
throw new RuntimeException(e);
}
}
}
use of org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion in project hive by apache.
the class JDBCUnionPushDownRule method matches.
@Override
public boolean matches(RelOptRuleCall call) {
final HiveUnion union = call.rel(0);
final HiveJdbcConverter converter1 = call.rel(1);
final HiveJdbcConverter converter2 = call.rel(2);
// First we compare the convention
if (!converter1.getJdbcConvention().getName().equals(converter2.getJdbcConvention().getName())) {
return false;
}
// Second, we compare the connection string
if (!converter1.getConnectionUrl().equals(converter2.getConnectionUrl())) {
return false;
}
// Third, we compare the connection user
if (!converter1.getConnectionUser().equals(converter2.getConnectionUser())) {
return false;
}
return union.getInputs().size() == 2;
}
Aggregations