use of org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr in project hive by apache.
the class HiveSortLimitVisitor method visit.
@Override
OpAttr visit(HiveSortLimit sortRel) throws SemanticException {
OpAttr inputOpAf = hiveOpConverter.dispatch(sortRel.getInput());
if (LOG.isDebugEnabled()) {
LOG.debug("Translating operator rel#" + sortRel.getId() + ":" + sortRel.getRelTypeName() + " with row type: [" + sortRel.getRowType() + "]");
if (sortRel.getCollation() == RelCollations.EMPTY) {
LOG.debug("Operator rel#" + sortRel.getId() + ":" + sortRel.getRelTypeName() + " consists of limit");
} else if (sortRel.fetch == null) {
LOG.debug("Operator rel#" + sortRel.getId() + ":" + sortRel.getRelTypeName() + " consists of sort");
} else {
LOG.debug("Operator rel#" + sortRel.getId() + ":" + sortRel.getRelTypeName() + " consists of sort+limit");
}
}
Operator<?> inputOp = inputOpAf.inputs.get(0);
Operator<?> resultOp = inputOpAf.inputs.get(0);
// of their columns
if (sortRel.getCollation() != RelCollations.EMPTY) {
// In strict mode, in the presence of order by, limit must be specified.
if (sortRel.fetch == null) {
String error = StrictChecks.checkNoLimit(hiveOpConverter.getHiveConf());
if (error != null)
throw new SemanticException(error);
}
// 1.a. Extract order for each column from collation
// Generate sortCols and order
ImmutableBitSet.Builder sortColsPosBuilder = ImmutableBitSet.builder();
ImmutableBitSet.Builder sortOutputColsPosBuilder = ImmutableBitSet.builder();
Map<Integer, RexNode> obRefToCallMap = sortRel.getInputRefToCallMap();
List<ExprNodeDesc> sortCols = new ArrayList<ExprNodeDesc>();
StringBuilder order = new StringBuilder();
StringBuilder nullOrder = new StringBuilder();
for (RelFieldCollation sortInfo : sortRel.getCollation().getFieldCollations()) {
int sortColumnPos = sortInfo.getFieldIndex();
ColumnInfo columnInfo = new ColumnInfo(inputOp.getSchema().getSignature().get(sortColumnPos));
ExprNodeColumnDesc sortColumn = new ExprNodeColumnDesc(columnInfo.getType(), columnInfo.getInternalName(), columnInfo.getTabAlias(), columnInfo.getIsVirtualCol());
sortCols.add(sortColumn);
if (sortInfo.getDirection() == RelFieldCollation.Direction.DESCENDING) {
order.append("-");
} else {
order.append("+");
}
if (sortInfo.nullDirection == RelFieldCollation.NullDirection.FIRST) {
nullOrder.append("a");
} else if (sortInfo.nullDirection == RelFieldCollation.NullDirection.LAST) {
nullOrder.append("z");
} else {
// Default
nullOrder.append(sortInfo.getDirection() == RelFieldCollation.Direction.DESCENDING ? "z" : "a");
}
if (obRefToCallMap != null) {
RexNode obExpr = obRefToCallMap.get(sortColumnPos);
sortColsPosBuilder.set(sortColumnPos);
if (obExpr == null) {
sortOutputColsPosBuilder.set(sortColumnPos);
}
}
}
// Use only 1 reducer for order by
int numReducers = 1;
// We keep the columns only the columns that are part of the final output
List<String> keepColumns = new ArrayList<String>();
final ImmutableBitSet sortColsPos = sortColsPosBuilder.build();
final ImmutableBitSet sortOutputColsPos = sortOutputColsPosBuilder.build();
final List<ColumnInfo> inputSchema = inputOp.getSchema().getSignature();
for (int pos = 0; pos < inputSchema.size(); pos++) {
if ((sortColsPos.get(pos) && sortOutputColsPos.get(pos)) || (!sortColsPos.get(pos) && !sortOutputColsPos.get(pos))) {
keepColumns.add(inputSchema.get(pos).getInternalName());
}
}
// 1.b. Generate reduce sink and project operator
resultOp = HiveOpConverterUtils.genReduceSinkAndBacktrackSelect(resultOp, sortCols.toArray(new ExprNodeDesc[sortCols.size()]), 0, new ArrayList<ExprNodeDesc>(), order.toString(), nullOrder.toString(), numReducers, Operation.NOT_ACID, hiveOpConverter.getHiveConf(), keepColumns);
}
// 2. If we need to generate limit
if (sortRel.fetch != null) {
int limit = RexLiteral.intValue(sortRel.fetch);
int offset = sortRel.offset == null ? 0 : RexLiteral.intValue(sortRel.offset);
LimitDesc limitDesc = new LimitDesc(offset, limit);
ArrayList<ColumnInfo> cinfoLst = HiveOpConverterUtils.createColInfos(resultOp);
resultOp = OperatorFactory.getAndMakeChild(limitDesc, new RowSchema(cinfoLst), resultOp);
if (LOG.isDebugEnabled()) {
LOG.debug("Generated " + resultOp + " with row schema: [" + resultOp.getSchema() + "]");
}
}
// 3. Return result
return inputOpAf.clone(resultOp);
}
use of org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr in project hive by apache.
the class JoinVisitor method visit.
@Override
OpAttr visit(RelNode joinRel) throws SemanticException {
// 0. Additional data structures needed for the join optimization
// through Hive
String[] baseSrc = new String[joinRel.getInputs().size()];
String tabAlias = hiveOpConverter.getHiveDerivedTableAlias();
// 1. Convert inputs
OpAttr[] inputs = new OpAttr[joinRel.getInputs().size()];
List<Operator<?>> children = new ArrayList<Operator<?>>(joinRel.getInputs().size());
for (int i = 0; i < inputs.length; i++) {
inputs[i] = hiveOpConverter.dispatch(joinRel.getInput(i));
children.add(inputs[i].inputs.get(0));
baseSrc[i] = inputs[i].tabAlias;
}
// 2. Generate tags
for (int tag = 0; tag < children.size(); tag++) {
ReduceSinkOperator reduceSinkOp = (ReduceSinkOperator) children.get(tag);
reduceSinkOp.getConf().setTag(tag);
}
// 3. Virtual columns
Set<Integer> newVcolsInCalcite = new HashSet<Integer>();
newVcolsInCalcite.addAll(inputs[0].vcolsInCalcite);
if (joinRel instanceof HiveMultiJoin || !((joinRel instanceof Join) && ((((Join) joinRel).isSemiJoin()) || (((Join) joinRel).getJoinType() == JoinRelType.ANTI)))) {
int shift = inputs[0].inputs.get(0).getSchema().getSignature().size();
for (int i = 1; i < inputs.length; i++) {
newVcolsInCalcite.addAll(HiveCalciteUtil.shiftVColsSet(inputs[i].vcolsInCalcite, shift));
shift += inputs[i].inputs.get(0).getSchema().getSignature().size();
}
}
if (LOG.isDebugEnabled()) {
LOG.debug("Translating operator rel#" + joinRel.getId() + ":" + joinRel.getRelTypeName() + " with row type: [" + joinRel.getRowType() + "]");
}
// 4. Extract join key expressions from HiveSortExchange
ExprNodeDesc[][] joinExpressions = new ExprNodeDesc[inputs.length][];
for (int i = 0; i < inputs.length; i++) {
joinExpressions[i] = ((HiveSortExchange) joinRel.getInput(i)).getKeyExpressions();
}
// 5. Extract rest of join predicate info. We infer the rest of join condition
// that will be added to the filters (join conditions that are not part of
// the join key)
List<RexNode> joinFilters;
if (joinRel instanceof HiveJoin) {
joinFilters = ImmutableList.of(((HiveJoin) joinRel).getJoinFilter());
} else if (joinRel instanceof HiveMultiJoin) {
joinFilters = ((HiveMultiJoin) joinRel).getJoinFilters();
} else if (joinRel instanceof HiveSemiJoin) {
joinFilters = ImmutableList.of(((HiveSemiJoin) joinRel).getJoinFilter());
} else if (joinRel instanceof HiveAntiJoin) {
joinFilters = ImmutableList.of(((HiveAntiJoin) joinRel).getJoinFilter());
} else {
throw new SemanticException("Can't handle join type: " + joinRel.getClass().getName());
}
List<List<ExprNodeDesc>> filterExpressions = Lists.newArrayList();
for (int i = 0; i < joinFilters.size(); i++) {
List<ExprNodeDesc> filterExpressionsForInput = new ArrayList<ExprNodeDesc>();
if (joinFilters.get(i) != null) {
for (RexNode conj : RelOptUtil.conjunctions(joinFilters.get(i))) {
ExprNodeDesc expr = HiveOpConverterUtils.convertToExprNode(conj, joinRel, null, newVcolsInCalcite);
filterExpressionsForInput.add(expr);
}
}
filterExpressions.add(filterExpressionsForInput);
}
// 6. Generate Join operator
JoinOperator joinOp = genJoin(joinRel, joinExpressions, filterExpressions, children, baseSrc, tabAlias);
// 7. Return result
return new OpAttr(tabAlias, newVcolsInCalcite, joinOp);
}
use of org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr in project hive by apache.
the class HiveGBOpConvUtil method genReduceGBRS.
private static OpAttr genReduceGBRS(OpAttr inputOpAf, GBInfo gbInfo) throws SemanticException {
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
ArrayList<String> outputColumnNames = new ArrayList<String>();
ArrayList<ColumnInfo> colInfoLst = new ArrayList<ColumnInfo>();
GroupByOperator reduceSideGB1 = (GroupByOperator) inputOpAf.inputs.get(0);
List<ColumnInfo> gb1ColInfoLst = reduceSideGB1.getSchema().getSignature();
ArrayList<ExprNodeDesc> reduceKeys = getReduceKeysForRS(reduceSideGB1, 0, gbInfo.gbKeys.size() - 1, outputColumnNames, false, colInfoLst, colExprMap, true, true);
if (inclGrpSetInReduceSide(gbInfo)) {
addGrpSetCol(false, gb1ColInfoLst.get(reduceKeys.size()).getInternalName(), true, reduceKeys, outputColumnNames, colInfoLst, colExprMap);
}
ArrayList<ExprNodeDesc> reduceValues = getValueKeysForRS(reduceSideGB1, reduceSideGB1.getConf().getKeys().size(), outputColumnNames, colInfoLst, colExprMap, true, true);
ReduceSinkOperator rsOp = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumnNames, true, -1, getNumPartFieldsForReduceSideRS(gbInfo), getParallelismForReduceSideRS(gbInfo), AcidUtils.Operation.NOT_ACID, gbInfo.defaultNullOrder), new RowSchema(colInfoLst), reduceSideGB1);
rsOp.setColumnExprMap(colExprMap);
return new OpAttr("", new HashSet<Integer>(), rsOp);
}
use of org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr in project hive by apache.
the class HiveGBOpConvUtil method genMapSideRS.
private static OpAttr genMapSideRS(OpAttr inputOpAf, GBInfo gbInfo) throws SemanticException {
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
List<String> outputKeyColumnNames = new ArrayList<String>();
List<String> outputValueColumnNames = new ArrayList<String>();
ArrayList<ColumnInfo> colInfoLst = new ArrayList<ColumnInfo>();
String outputColName;
// 1. Add GB Keys to reduce keys
ArrayList<ExprNodeDesc> reduceKeys = new ArrayList<ExprNodeDesc>();
for (int i = 0; i < gbInfo.gbKeys.size(); i++) {
// gbInfo already has ExprNode for gbkeys
reduceKeys.add(gbInfo.gbKeys.get(i));
String colOutputName = SemanticAnalyzer.getColumnInternalName(i);
outputKeyColumnNames.add(colOutputName);
colInfoLst.add(new ColumnInfo(Utilities.ReduceField.KEY.toString() + "." + colOutputName, gbInfo.gbKeyTypes.get(i), "", false));
colExprMap.put(colOutputName, gbInfo.gbKeys.get(i));
}
// Note: GROUPING SETS are not allowed with map side aggregation set to false so we don't have to worry about it
int keyLength = reduceKeys.size();
// 2. Add Dist UDAF args to reduce keys
if (gbInfo.containsDistinctAggr) {
// TODO: Why is this needed (doesn't represent any cols)
String udafName = SemanticAnalyzer.getColumnInternalName(reduceKeys.size());
outputKeyColumnNames.add(udafName);
for (int i = 0; i < gbInfo.distExprNodes.size(); i++) {
reduceKeys.add(gbInfo.distExprNodes.get(i));
// this part of reduceKeys is later used to create column names strictly for non-distinct aggregates
// with parameters same as distinct keys which expects _col0 at the end. So we always append
// _col0 at the end instead of _col<i>
outputColName = SemanticAnalyzer.getColumnInternalName(0);
String field = Utilities.ReduceField.KEY.toString() + "." + udafName + ":" + i + "." + outputColName;
ColumnInfo colInfo = new ColumnInfo(field, gbInfo.distExprNodes.get(i).getTypeInfo(), null, false);
colInfoLst.add(colInfo);
colExprMap.put(field, gbInfo.distExprNodes.get(i));
}
}
// 3. Add UDAF args deduped to reduce values
ArrayList<ExprNodeDesc> reduceValues = new ArrayList<ExprNodeDesc>();
for (int i = 0; i < gbInfo.deDupedNonDistIrefs.size(); i++) {
reduceValues.add(gbInfo.deDupedNonDistIrefs.get(i));
outputColName = SemanticAnalyzer.getColumnInternalName(reduceValues.size() - 1);
outputValueColumnNames.add(outputColName);
String field = Utilities.ReduceField.VALUE.toString() + "." + outputColName;
colInfoLst.add(new ColumnInfo(field, reduceValues.get(reduceValues.size() - 1).getTypeInfo(), null, false));
colExprMap.put(field, reduceValues.get(reduceValues.size() - 1));
}
// 4. Gen RS
ReduceSinkOperator rsOp = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys, keyLength, reduceValues, gbInfo.distColIndices, outputKeyColumnNames, outputValueColumnNames, true, -1, getNumPartFieldsForMapSideRS(gbInfo), getParallelismForMapSideRS(gbInfo), AcidUtils.Operation.NOT_ACID, gbInfo.defaultNullOrder), new RowSchema(colInfoLst), inputOpAf.inputs.get(0));
rsOp.setColumnExprMap(colExprMap);
return new OpAttr("", new HashSet<Integer>(), rsOp);
}
use of org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr in project hive by apache.
the class HiveGBOpConvUtil method translateGB.
static OpAttr translateGB(OpAttr inputOpAf, HiveAggregate aggRel, HiveConf hc) throws SemanticException {
OpAttr translatedGBOpAttr = null;
GBInfo gbInfo = getGBInfo(aggRel, inputOpAf, hc);
switch(gbInfo.gbPhysicalPipelineMode) {
case MAP_SIDE_GB_NO_SKEW_NO_ADD_MR_JOB:
translatedGBOpAttr = genMapSideGBNoSkewNoAddMRJob(inputOpAf, aggRel, gbInfo);
break;
case MAP_SIDE_GB_NO_SKEW_ADD_MR_JOB:
translatedGBOpAttr = genMapSideGBNoSkewAddMRJob(inputOpAf, aggRel, gbInfo);
break;
case MAP_SIDE_GB_SKEW_GBKEYS_OR_DIST_UDAF_PRESENT:
translatedGBOpAttr = genMapSideGBSkewGBKeysOrDistUDAFPresent(inputOpAf, aggRel, gbInfo);
break;
case MAP_SIDE_GB_SKEW_GBKEYS_AND_DIST_UDAF_NOT_PRESENT:
translatedGBOpAttr = genMapSideGBSkewGBKeysAndDistUDAFNotPresent(inputOpAf, aggRel, gbInfo);
break;
case NO_MAP_SIDE_GB_NO_SKEW:
translatedGBOpAttr = genNoMapSideGBNoSkew(inputOpAf, aggRel, gbInfo);
break;
case NO_MAP_SIDE_GB_SKEW:
translatedGBOpAttr = genNoMapSideGBSkew(inputOpAf, aggRel, gbInfo);
break;
}
return translatedGBOpAttr;
}
Aggregations