use of org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr in project hive by apache.
the class HiveGBOpConvUtil method genMapSideGBRS.
private static OpAttr genMapSideGBRS(OpAttr inputOpAf, GBInfo gbInfo) throws SemanticException {
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
List<String> outputKeyColumnNames = new ArrayList<String>();
List<String> outputValueColumnNames = new ArrayList<String>();
ArrayList<ColumnInfo> colInfoLst = new ArrayList<ColumnInfo>();
GroupByOperator mapGB = (GroupByOperator) inputOpAf.inputs.get(0);
ArrayList<ExprNodeDesc> reduceKeys = getReduceKeysForRS(mapGB, 0, gbInfo.gbKeys.size() - 1, outputKeyColumnNames, false, colInfoLst, colExprMap, false, false);
int keyLength = reduceKeys.size();
if (inclGrpSetInMapSide(gbInfo)) {
addGrpSetCol(false, SemanticAnalyzer.getColumnInternalName(reduceKeys.size()), true, reduceKeys, outputKeyColumnNames, colInfoLst, colExprMap);
keyLength++;
}
if (mapGB.getConf().getKeys().size() > reduceKeys.size()) {
// NOTE: All dist cols have single output col name;
reduceKeys.addAll(getReduceKeysForRS(mapGB, reduceKeys.size(), mapGB.getConf().getKeys().size() - 1, outputKeyColumnNames, true, colInfoLst, colExprMap, false, false));
} else if (!gbInfo.distColIndices.isEmpty()) {
// This is the case where distinct cols are part of GB Keys in which case
// we still need to add it to out put col names
outputKeyColumnNames.add(SemanticAnalyzer.getColumnInternalName(reduceKeys.size()));
}
ArrayList<ExprNodeDesc> reduceValues = getValueKeysForRS(mapGB, mapGB.getConf().getKeys().size(), outputValueColumnNames, colInfoLst, colExprMap, false, false);
ReduceSinkOperator rsOp = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys, keyLength, reduceValues, gbInfo.distColIndices, outputKeyColumnNames, outputValueColumnNames, true, -1, getNumPartFieldsForMapSideRS(gbInfo), getParallelismForMapSideRS(gbInfo), AcidUtils.Operation.NOT_ACID, gbInfo.defaultNullOrder), new RowSchema(colInfoLst), mapGB);
rsOp.setColumnExprMap(colExprMap);
return new OpAttr("", new HashSet<Integer>(), rsOp);
}
use of org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr in project hive by apache.
the class HiveGBOpConvUtil method genReduceSideGB1NoMapGB.
/**
* RS-GB0
*
* @param inputOpAf
* @param gbInfo
* @param gbMode
* @return
* @throws SemanticException
*/
private static OpAttr genReduceSideGB1NoMapGB(OpAttr inputOpAf, GBInfo gbInfo, GroupByDesc.Mode gbMode) throws SemanticException {
ArrayList<String> outputColNames = new ArrayList<String>();
ArrayList<ColumnInfo> colInfoLst = new ArrayList<ColumnInfo>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
String colOutputName = null;
ReduceSinkOperator rs = (ReduceSinkOperator) inputOpAf.inputs.get(0);
List<ColumnInfo> rsColInfoLst = rs.getSchema().getSignature();
ColumnInfo ci;
boolean useOriginalGBNames = (gbInfo.gbPhysicalPipelineMode == HIVEGBPHYSICALMODE.NO_MAP_SIDE_GB_NO_SKEW);
// 1. Build GB Keys, grouping set starting position
// 1.1 First Add original GB Keys
ArrayList<ExprNodeDesc> gbKeys = ExprNodeDescUtils.genExprNodeDesc(rs, 0, gbInfo.gbKeys.size() - 1, true, false);
for (int i = 0; i < gbInfo.gbKeys.size(); i++) {
ci = rsColInfoLst.get(i);
if (useOriginalGBNames) {
colOutputName = gbInfo.outputColNames.get(i);
} else {
colOutputName = SemanticAnalyzer.getColumnInternalName(i);
}
outputColNames.add(colOutputName);
colInfoLst.add(new ColumnInfo(colOutputName, ci.getType(), null, false));
colExprMap.put(colOutputName, gbKeys.get(i));
}
// 2. Walk through UDAF and add them to GB
String lastReduceKeyColName = null;
if (!rs.getConf().getOutputKeyColumnNames().isEmpty()) {
lastReduceKeyColName = rs.getConf().getOutputKeyColumnNames().get(rs.getConf().getOutputKeyColumnNames().size() - 1);
}
int numDistinctUDFs = 0;
List<ExprNodeDesc> reduceValues = rs.getConf().getValueCols();
ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
int udafColStartPosInOriginalGB = gbInfo.gbKeys.size();
// the positions in rsColInfoLst are as follows
// --grpkey--,--distkey--,--values--
// but distUDAF may be before/after some non-distUDAF,
// i.e., their positions can be mixed.
// so for all UDAF we first check to see if it is groupby key, if not is it distinct key
// if not it should be value
Map<Integer, List<ExprNodeDesc>> indexToParameter = new TreeMap<>();
for (int i = 0; i < gbInfo.udafAttrs.size(); i++) {
UDAFAttrs udafAttr = gbInfo.udafAttrs.get(i);
ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
ColumnInfo rsUDAFParamColInfo;
ExprNodeDesc udafParam;
ExprNodeDesc constantPropDistinctUDAFParam;
for (int j = 0; j < udafAttr.udafParams.size(); j++) {
int argPos = getColInfoPos(udafAttr.udafParams.get(j), gbInfo);
rsUDAFParamColInfo = rsColInfoLst.get(argPos);
String rsUDAFParamName = rsUDAFParamColInfo.getInternalName();
if (udafAttr.isDistinctUDAF && lastReduceKeyColName != null) {
rsUDAFParamName = Utilities.ReduceField.KEY.name() + "." + lastReduceKeyColName + ":" + numDistinctUDFs + "." + SemanticAnalyzer.getColumnInternalName(j);
}
udafParam = new ExprNodeColumnDesc(rsUDAFParamColInfo.getType(), rsUDAFParamName, rsUDAFParamColInfo.getTabAlias(), rsUDAFParamColInfo.getIsVirtualCol());
constantPropDistinctUDAFParam = SemanticAnalyzer.isConstantParameterInAggregationParameters(rsUDAFParamColInfo.getInternalName(), reduceValues);
if (constantPropDistinctUDAFParam != null) {
udafParam = constantPropDistinctUDAFParam;
}
aggParameters.add(udafParam);
}
indexToParameter.put(i, aggParameters);
if (udafAttr.isDistinctUDAF) {
numDistinctUDFs++;
}
}
for (Map.Entry<Integer, List<ExprNodeDesc>> e : indexToParameter.entrySet()) {
UDAFAttrs udafAttr = gbInfo.udafAttrs.get(e.getKey());
Mode udafMode = SemanticAnalyzer.groupByDescModeToUDAFMode(gbMode, udafAttr.isDistinctUDAF);
GenericUDAFInfo udaf = SemanticAnalyzer.getGenericUDAFInfo(udafAttr.udafEvaluator, udafMode, e.getValue());
aggregations.add(new AggregationDesc(udafAttr.udafName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, udafAttr.isDistinctUDAF, udafMode));
if (useOriginalGBNames) {
colOutputName = gbInfo.outputColNames.get(udafColStartPosInOriginalGB + e.getKey());
} else {
colOutputName = SemanticAnalyzer.getColumnInternalName(gbKeys.size() + aggregations.size() - 1);
}
colInfoLst.add(new ColumnInfo(colOutputName, udaf.returnType, "", false));
outputColNames.add(colOutputName);
}
Operator rsGB1 = OperatorFactory.getAndMakeChild(new GroupByDesc(gbMode, outputColNames, gbKeys, aggregations, false, gbInfo.groupByMemoryUsage, gbInfo.minReductionHashAggrLowerBound, gbInfo.memoryThreshold, gbInfo.minReductionHashAggr, null, false, -1, numDistinctUDFs > 0), new RowSchema(colInfoLst), rs);
rsGB1.setColumnExprMap(colExprMap);
return new OpAttr("", new HashSet<Integer>(), rsGB1);
}
use of org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr in project hive by apache.
the class HiveSortExchangeVisitor method visit.
@Override
OpAttr visit(HiveSortExchange exchangeRel) throws SemanticException {
OpAttr inputOpAf = hiveOpConverter.dispatch(exchangeRel.getInput());
String tabAlias = inputOpAf.tabAlias;
if (tabAlias == null || tabAlias.length() == 0) {
tabAlias = hiveOpConverter.getHiveDerivedTableAlias();
}
if (LOG.isDebugEnabled()) {
LOG.debug("Translating operator rel#" + exchangeRel.getId() + ":" + exchangeRel.getRelTypeName() + " with row type: [" + exchangeRel.getRowType() + "]");
}
RelDistribution distribution = exchangeRel.getDistribution();
if (distribution.getType() != Type.HASH_DISTRIBUTED) {
throw new SemanticException("Only hash distribution supported for LogicalExchange");
}
ExprNodeDesc[] expressions = new ExprNodeDesc[exchangeRel.getKeys().size()];
for (int index = 0; index < exchangeRel.getKeys().size(); index++) {
expressions[index] = HiveOpConverterUtils.convertToExprNode(exchangeRel.getKeys().get(index), exchangeRel.getInput(), inputOpAf.tabAlias, inputOpAf.vcolsInCalcite);
}
exchangeRel.setKeyExpressions(expressions);
ReduceSinkOperator rsOp = genReduceSink(inputOpAf.inputs.get(0), tabAlias, expressions, -1, -1, Operation.NOT_ACID, hiveOpConverter.getHiveConf());
return new OpAttr(tabAlias, inputOpAf.vcolsInCalcite, rsOp);
}
use of org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr in project hive by apache.
the class HiveTableFunctionScanVisitor method visit.
@Override
OpAttr visit(HiveTableFunctionScan scanRel) throws SemanticException {
if (LOG.isDebugEnabled()) {
LOG.debug("Translating operator rel#" + scanRel.getId() + ":" + scanRel.getRelTypeName() + " with row type: [" + scanRel.getRowType() + "]");
}
RexCall call = (RexCall) scanRel.getCall();
RowResolver rowResolver = new RowResolver();
List<String> fieldNames = new ArrayList<>(scanRel.getRowType().getFieldNames());
List<String> functionFieldNames = new ArrayList<>();
List<ExprNodeDesc> exprCols = new ArrayList<>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<>();
for (int pos = 0; pos < call.getOperands().size(); pos++) {
ExprNodeConverter converter = new ExprNodeConverter(SemanticAnalyzer.DUMMY_TABLE, fieldNames.get(pos), scanRel.getRowType(), scanRel.getRowType(), ((HiveTableScan) scanRel.getInput(0)).getPartOrVirtualCols(), scanRel.getCluster().getTypeFactory(), true);
ExprNodeDesc exprCol = call.getOperands().get(pos).accept(converter);
colExprMap.put(HiveConf.getColumnInternalName(pos), exprCol);
exprCols.add(exprCol);
ColumnInfo columnInfo = new ColumnInfo(HiveConf.getColumnInternalName(pos), exprCol.getWritableObjectInspector(), SemanticAnalyzer.DUMMY_TABLE, false);
rowResolver.put(columnInfo.getTabAlias(), columnInfo.getAlias(), columnInfo);
functionFieldNames.add(HiveConf.getColumnInternalName(pos));
}
OpAttr inputOpAf = hiveOpConverter.dispatch(scanRel.getInputs().get(0));
TableScanOperator op = (TableScanOperator) inputOpAf.inputs.get(0);
op.getConf().setRowLimit(1);
Operator<?> output = OperatorFactory.getAndMakeChild(new SelectDesc(exprCols, functionFieldNames, false), new RowSchema(rowResolver.getRowSchema()), op);
output.setColumnExprMap(colExprMap);
Operator<?> funcOp = genUDTFPlan(call, functionFieldNames, output, rowResolver);
return new OpAttr(null, new HashSet<Integer>(), funcOp);
}
use of org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr in project hive by apache.
the class HiveUnionVisitor method visit.
@Override
OpAttr visit(HiveUnion unionRel) throws SemanticException {
// 1. Convert inputs
List<RelNode> inputsList = extractRelNodeFromUnion(unionRel);
OpAttr[] inputs = new OpAttr[inputsList.size()];
for (int i = 0; i < inputs.length; i++) {
inputs[i] = hiveOpConverter.dispatch(inputsList.get(i));
}
if (LOG.isDebugEnabled()) {
LOG.debug("Translating operator rel#" + unionRel.getId() + ":" + unionRel.getRelTypeName() + " with row type: [" + unionRel.getRowType() + "]");
}
// 2. Create a new union operator
UnionDesc unionDesc = new UnionDesc();
unionDesc.setNumInputs(inputs.length);
String tableAlias = hiveOpConverter.getHiveDerivedTableAlias();
ArrayList<ColumnInfo> cinfoLst = createColInfos(inputs[0].inputs.get(0), tableAlias);
Operator<?>[] children = new Operator<?>[inputs.length];
for (int i = 0; i < children.length; i++) {
if (i == 0) {
children[i] = inputs[i].inputs.get(0);
} else {
Operator<?> op = inputs[i].inputs.get(0);
// We need to check if the other input branches for union is following the first branch
// We may need to cast the data types for specific columns.
children[i] = genInputSelectForUnion(op, cinfoLst);
}
}
Operator<? extends OperatorDesc> unionOp = OperatorFactory.getAndMakeChild(hiveOpConverter.getSemanticAnalyzer().getOpContext(), unionDesc, new RowSchema(cinfoLst), children);
if (LOG.isDebugEnabled()) {
LOG.debug("Generated " + unionOp + " with row schema: [" + unionOp.getSchema() + "]");
}
// 3. Return result
return new OpAttr(tableAlias, inputs[0].vcolsInCalcite, unionOp);
}
Aggregations