use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class HiveOpConverter method visit.
OpAttr visit(HiveUnion unionRel) throws SemanticException {
// 1. Convert inputs
List<RelNode> inputsList = extractRelNodeFromUnion(unionRel);
OpAttr[] inputs = new OpAttr[inputsList.size()];
for (int i = 0; i < inputs.length; i++) {
inputs[i] = dispatch(inputsList.get(i));
}
if (LOG.isDebugEnabled()) {
LOG.debug("Translating operator rel#" + unionRel.getId() + ":" + unionRel.getRelTypeName() + " with row type: [" + unionRel.getRowType() + "]");
}
// 2. Create a new union operator
UnionDesc unionDesc = new UnionDesc();
unionDesc.setNumInputs(inputs.length);
String tableAlias = getHiveDerivedTableAlias();
ArrayList<ColumnInfo> cinfoLst = createColInfos(inputs[0].inputs.get(0), tableAlias);
Operator<?>[] children = new Operator<?>[inputs.length];
for (int i = 0; i < children.length; i++) {
if (i == 0) {
children[i] = inputs[i].inputs.get(0);
} else {
Operator<?> op = inputs[i].inputs.get(0);
// We need to check if the other input branches for union is following the first branch
// We may need to cast the data types for specific columns.
children[i] = genInputSelectForUnion(op, cinfoLst);
}
}
Operator<? extends OperatorDesc> unionOp = OperatorFactory.getAndMakeChild(semanticAnalyzer.getOpContext(), unionDesc, new RowSchema(cinfoLst), children);
if (LOG.isDebugEnabled()) {
LOG.debug("Generated " + unionOp + " with row schema: [" + unionOp.getSchema() + "]");
}
// 3. Return result
return new OpAttr(tableAlias, inputs[0].vcolsInCalcite, unionOp);
}
use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class HiveOpConverter method convert.
public Operator convert(RelNode root) throws SemanticException {
OpAttr opAf = dispatch(root);
Operator rootOp = opAf.inputs.get(0);
handleTopLimit(rootOp);
return rootOp;
}
use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class HiveGBOpConvUtil method genReduceSideGB1.
private static OpAttr genReduceSideGB1(OpAttr inputOpAf, GBInfo gbInfo, boolean computeGrpSet, boolean propagateConstInDistinctUDAF, GroupByDesc.Mode gbMode) throws SemanticException {
ArrayList<String> outputColNames = new ArrayList<String>();
ArrayList<ColumnInfo> colInfoLst = new ArrayList<ColumnInfo>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
String colOutputName = null;
ReduceSinkOperator rs = (ReduceSinkOperator) inputOpAf.inputs.get(0);
List<ColumnInfo> rsColInfoLst = rs.getSchema().getSignature();
ColumnInfo ci;
boolean finalGB = (gbInfo.gbPhysicalPipelineMode == HIVEGBPHYSICALMODE.MAP_SIDE_GB_NO_SKEW_NO_ADD_MR_JOB);
// 1. Build GB Keys, grouping set starting position
// 1.1 First Add original GB Keys
ArrayList<ExprNodeDesc> gbKeys = ExprNodeDescUtils.genExprNodeDesc(rs, 0, gbInfo.gbKeys.size() - 1, false, false);
for (int i = 0; i < gbInfo.gbKeys.size(); i++) {
ci = rsColInfoLst.get(i);
if (finalGB) {
colOutputName = gbInfo.outputColNames.get(i);
} else {
colOutputName = SemanticAnalyzer.getColumnInternalName(i);
}
outputColNames.add(colOutputName);
colInfoLst.add(new ColumnInfo(colOutputName, ci.getType(), "", false));
colExprMap.put(colOutputName, gbKeys.get(i));
}
// 1.2 Add GrpSet Col
int groupingSetsColPosition = -1;
if ((!finalGB && gbInfo.grpSets.size() > 0) || (finalGB && gbInfo.grpIdFunctionNeeded)) {
groupingSetsColPosition = gbInfo.gbKeys.size();
if (computeGrpSet) {
// GrpSet Col needs to be constructed
gbKeys.add(new ExprNodeConstantDesc("0"));
} else {
// GrpSet Col already part of input RS
// TODO: Can't we just copy the ExprNodeDEsc from input (Do we need to
// explicitly set table alias to null & VC to false
gbKeys.addAll(ExprNodeDescUtils.genExprNodeDesc(rs, groupingSetsColPosition, groupingSetsColPosition, false, true));
}
colOutputName = SemanticAnalyzer.getColumnInternalName(groupingSetsColPosition);
if (finalGB) {
colOutputName = gbInfo.outputColNames.get(gbInfo.outputColNames.size() - 1);
}
outputColNames.add(colOutputName);
colInfoLst.add(new ColumnInfo(colOutputName, TypeInfoFactory.stringTypeInfo, null, true));
colExprMap.put(colOutputName, gbKeys.get(groupingSetsColPosition));
}
// 2. Walk through UDAF and add them to GB
String lastReduceKeyColName = null;
if (!rs.getConf().getOutputKeyColumnNames().isEmpty()) {
lastReduceKeyColName = rs.getConf().getOutputKeyColumnNames().get(rs.getConf().getOutputKeyColumnNames().size() - 1);
}
int numDistinctUDFs = 0;
int distinctStartPosInReduceKeys = gbKeys.size();
List<ExprNodeDesc> reduceValues = rs.getConf().getValueCols();
ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
int udafColStartPosInOriginalGB = (gbInfo.grpSets.size() > 0) ? gbInfo.gbKeys.size() * 2 : gbInfo.gbKeys.size();
int udafColStartPosInRS = rs.getConf().getKeyCols().size();
for (int i = 0; i < gbInfo.udafAttrs.size(); i++) {
UDAFAttrs udafAttr = gbInfo.udafAttrs.get(i);
ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
if (udafAttr.isDistinctUDAF) {
ColumnInfo rsDistUDAFParamColInfo;
ExprNodeDesc distinctUDAFParam;
ExprNodeDesc constantPropDistinctUDAFParam;
for (int j = 0; j < udafAttr.udafParamsIndxInGBInfoDistExprs.size(); j++) {
rsDistUDAFParamColInfo = rsColInfoLst.get(distinctStartPosInReduceKeys + j);
String rsDistUDAFParamName = rsDistUDAFParamColInfo.getInternalName();
// TODO: verify if this is needed
if (lastReduceKeyColName != null) {
rsDistUDAFParamName = Utilities.ReduceField.KEY.name() + "." + lastReduceKeyColName + ":" + numDistinctUDFs + "." + SemanticAnalyzer.getColumnInternalName(j);
}
distinctUDAFParam = new ExprNodeColumnDesc(rsDistUDAFParamColInfo.getType(), rsDistUDAFParamName, rsDistUDAFParamColInfo.getTabAlias(), rsDistUDAFParamColInfo.getIsVirtualCol());
if (propagateConstInDistinctUDAF) {
// TODO: Implement propConstDistUDAFParams
constantPropDistinctUDAFParam = SemanticAnalyzer.isConstantParameterInAggregationParameters(rsDistUDAFParamColInfo.getInternalName(), reduceValues);
if (constantPropDistinctUDAFParam != null) {
distinctUDAFParam = constantPropDistinctUDAFParam;
}
}
aggParameters.add(distinctUDAFParam);
}
numDistinctUDFs++;
} else {
aggParameters.add(new ExprNodeColumnDesc(rsColInfoLst.get(udafColStartPosInRS + i)));
}
Mode udafMode = SemanticAnalyzer.groupByDescModeToUDAFMode(gbMode, udafAttr.isDistinctUDAF);
GenericUDAFInfo udaf = SemanticAnalyzer.getGenericUDAFInfo(udafAttr.udafEvaluator, udafMode, aggParameters);
aggregations.add(new AggregationDesc(udafAttr.udafName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, (gbMode != GroupByDesc.Mode.FINAL && udafAttr.isDistinctUDAF), udafMode));
if (finalGB) {
colOutputName = gbInfo.outputColNames.get(udafColStartPosInOriginalGB + i);
} else {
colOutputName = SemanticAnalyzer.getColumnInternalName(gbKeys.size() + aggregations.size() - 1);
}
colInfoLst.add(new ColumnInfo(colOutputName, udaf.returnType, "", false));
outputColNames.add(colOutputName);
}
// Nothing special needs to be done for grouping sets if
// this is the final group by operator, and multiple rows corresponding to
// the
// grouping sets have been generated upstream.
// However, if an addition MR job has been created to handle grouping sets,
// additional rows corresponding to grouping sets need to be created here.
//TODO: Clean up/refactor assumptions
boolean includeGrpSetInGBDesc = (gbInfo.grpSets.size() > 0) && !finalGB && !(gbInfo.gbPhysicalPipelineMode == HIVEGBPHYSICALMODE.MAP_SIDE_GB_SKEW_GBKEYS_OR_DIST_UDAF_PRESENT);
Operator rsGBOp = OperatorFactory.getAndMakeChild(new GroupByDesc(gbMode, outputColNames, gbKeys, aggregations, gbInfo.groupByMemoryUsage, gbInfo.memoryThreshold, gbInfo.grpSets, includeGrpSetInGBDesc, groupingSetsColPosition, gbInfo.containsDistinctAggr), new RowSchema(colInfoLst), rs);
rsGBOp.setColumnExprMap(colExprMap);
return new OpAttr("", new HashSet<Integer>(), rsGBOp);
}
use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class HiveGBOpConvUtil method genReduceSideGB1NoMapGB.
/**
* RS-GB0
*
* @param inputOpAf
* @param gbInfo
* @param gbMode
* @return
* @throws SemanticException
*/
private static OpAttr genReduceSideGB1NoMapGB(OpAttr inputOpAf, GBInfo gbInfo, GroupByDesc.Mode gbMode) throws SemanticException {
ArrayList<String> outputColNames = new ArrayList<String>();
ArrayList<ColumnInfo> colInfoLst = new ArrayList<ColumnInfo>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
String colOutputName = null;
ReduceSinkOperator rs = (ReduceSinkOperator) inputOpAf.inputs.get(0);
List<ColumnInfo> rsColInfoLst = rs.getSchema().getSignature();
ColumnInfo ci;
boolean useOriginalGBNames = (gbInfo.gbPhysicalPipelineMode == HIVEGBPHYSICALMODE.NO_MAP_SIDE_GB_NO_SKEW);
// 1. Build GB Keys, grouping set starting position
// 1.1 First Add original GB Keys
ArrayList<ExprNodeDesc> gbKeys = ExprNodeDescUtils.genExprNodeDesc(rs, 0, gbInfo.gbKeys.size() - 1, true, false);
for (int i = 0; i < gbInfo.gbKeys.size(); i++) {
ci = rsColInfoLst.get(i);
if (useOriginalGBNames) {
colOutputName = gbInfo.outputColNames.get(i);
} else {
colOutputName = SemanticAnalyzer.getColumnInternalName(i);
}
outputColNames.add(colOutputName);
colInfoLst.add(new ColumnInfo(colOutputName, ci.getType(), null, false));
colExprMap.put(colOutputName, gbKeys.get(i));
}
// 2. Walk through UDAF and add them to GB
String lastReduceKeyColName = null;
if (!rs.getConf().getOutputKeyColumnNames().isEmpty()) {
lastReduceKeyColName = rs.getConf().getOutputKeyColumnNames().get(rs.getConf().getOutputKeyColumnNames().size() - 1);
}
int numDistinctUDFs = 0;
List<ExprNodeDesc> reduceValues = rs.getConf().getValueCols();
ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
int udafColStartPosInOriginalGB = gbInfo.gbKeys.size();
// the positions in rsColInfoLst are as follows
// --grpkey--,--distkey--,--values--
// but distUDAF may be before/after some non-distUDAF,
// i.e., their positions can be mixed.
// so for all UDAF we first check to see if it is groupby key, if not is it distinct key
// if not it should be value
List<Integer> distinctPositions = new ArrayList<>();
Map<Integer, ArrayList<ExprNodeDesc>> indexToParameter = new TreeMap<>();
for (int i = 0; i < gbInfo.udafAttrs.size(); i++) {
UDAFAttrs udafAttr = gbInfo.udafAttrs.get(i);
ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
ColumnInfo rsUDAFParamColInfo;
ExprNodeDesc udafParam;
ExprNodeDesc constantPropDistinctUDAFParam;
for (int j = 0; j < udafAttr.udafParams.size(); j++) {
int argPos = getColInfoPos(udafAttr.udafParams.get(j), gbInfo);
rsUDAFParamColInfo = rsColInfoLst.get(argPos);
String rsUDAFParamName = rsUDAFParamColInfo.getInternalName();
if (udafAttr.isDistinctUDAF && lastReduceKeyColName != null) {
rsUDAFParamName = Utilities.ReduceField.KEY.name() + "." + lastReduceKeyColName + ":" + numDistinctUDFs + "." + SemanticAnalyzer.getColumnInternalName(j);
}
udafParam = new ExprNodeColumnDesc(rsUDAFParamColInfo.getType(), rsUDAFParamName, rsUDAFParamColInfo.getTabAlias(), rsUDAFParamColInfo.getIsVirtualCol());
constantPropDistinctUDAFParam = SemanticAnalyzer.isConstantParameterInAggregationParameters(rsUDAFParamColInfo.getInternalName(), reduceValues);
if (constantPropDistinctUDAFParam != null) {
udafParam = constantPropDistinctUDAFParam;
}
aggParameters.add(udafParam);
}
indexToParameter.put(i, aggParameters);
if (udafAttr.isDistinctUDAF) {
numDistinctUDFs++;
}
}
for (int index : indexToParameter.keySet()) {
UDAFAttrs udafAttr = gbInfo.udafAttrs.get(index);
Mode udafMode = SemanticAnalyzer.groupByDescModeToUDAFMode(gbMode, udafAttr.isDistinctUDAF);
GenericUDAFInfo udaf = SemanticAnalyzer.getGenericUDAFInfo(udafAttr.udafEvaluator, udafMode, indexToParameter.get(index));
aggregations.add(new AggregationDesc(udafAttr.udafName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, udafAttr.isDistinctUDAF, udafMode));
if (useOriginalGBNames) {
colOutputName = gbInfo.outputColNames.get(udafColStartPosInOriginalGB + index);
} else {
colOutputName = SemanticAnalyzer.getColumnInternalName(gbKeys.size() + aggregations.size() - 1);
}
colInfoLst.add(new ColumnInfo(colOutputName, udaf.returnType, "", false));
outputColNames.add(colOutputName);
}
Operator rsGB1 = OperatorFactory.getAndMakeChild(new GroupByDesc(gbMode, outputColNames, gbKeys, aggregations, false, gbInfo.groupByMemoryUsage, gbInfo.memoryThreshold, null, false, -1, numDistinctUDFs > 0), new RowSchema(colInfoLst), rs);
rsGB1.setColumnExprMap(colExprMap);
return new OpAttr("", new HashSet<Integer>(), rsGB1);
}
use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class HiveOpConverterPostProc method transform.
@Override
public ParseContext transform(ParseContext pctx) throws SemanticException {
// 0. We check the conditions to apply this transformation,
// if we do not meet them we bail out
final boolean cboEnabled = HiveConf.getBoolVar(pctx.getConf(), HiveConf.ConfVars.HIVE_CBO_ENABLED);
final boolean returnPathEnabled = HiveConf.getBoolVar(pctx.getConf(), HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP);
final boolean cboSucceeded = pctx.getContext().isCboSucceeded();
if (!(cboEnabled && returnPathEnabled && cboSucceeded)) {
return pctx;
}
// 1. Initialize aux data structures
this.pctx = pctx;
this.aliasToOpInfo = new HashMap<String, Operator<? extends OperatorDesc>>();
// 2. Trigger transformation
Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
opRules.put(new RuleRegExp("R1", JoinOperator.getOperatorName() + "%"), new JoinAnnotate());
opRules.put(new RuleRegExp("R2", TableScanOperator.getOperatorName() + "%"), new TableScanAnnotate());
Dispatcher disp = new DefaultRuleDispatcher(null, opRules, null);
GraphWalker ogw = new ForwardWalker(disp);
List<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(pctx.getTopOps().values());
ogw.startWalking(topNodes, null);
return pctx;
}
Aggregations