use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.
the class TypeCheckProcFactory method processGByExpr.
/**
* Function to do groupby subexpression elimination. This is called by all the
* processors initially. As an example, consider the query select a+b,
* count(1) from T group by a+b; Then a+b is already precomputed in the group
* by operators key, so we substitute a+b in the select list with the internal
* column name of the a+b expression that appears in the in input row
* resolver.
*
* @param nd
* The node that is being inspected.
* @param procCtx
* The processor context.
*
* @return exprNodeColumnDesc.
*/
public static ExprNodeDesc processGByExpr(Node nd, Object procCtx) throws SemanticException {
// We recursively create the exprNodeDesc. Base cases: when we encounter
// a column ref, we convert that into an exprNodeColumnDesc; when we
// encounter
// a constant, we convert that into an exprNodeConstantDesc. For others we
// just
// build the exprNodeFuncDesc with recursively built children.
ASTNode expr = (ASTNode) nd;
TypeCheckCtx ctx = (TypeCheckCtx) procCtx;
// having key in (select .. where a = min(b.value)
if (!ctx.isUseCaching() && ctx.getOuterRR() == null) {
return null;
}
RowResolver input = ctx.getInputRR();
ExprNodeDesc desc = null;
if ((ctx == null) || (input == null) || (!ctx.getAllowGBExprElimination())) {
return null;
}
// If the current subExpression is pre-calculated, as in Group-By etc.
ColumnInfo colInfo = input.getExpression(expr);
// try outer row resolver
RowResolver outerRR = ctx.getOuterRR();
if (colInfo == null && outerRR != null) {
colInfo = outerRR.getExpression(expr);
}
if (colInfo != null) {
desc = new ExprNodeColumnDesc(colInfo);
ASTNode source = input.getExpressionSource(expr);
if (source != null) {
ctx.getUnparseTranslator().addCopyTranslation(expr, source);
}
return desc;
}
return desc;
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.
the class SemanticAnalyzer method genReduceSinkPlan.
private Operator genReduceSinkPlan(String dest, QB qb, Operator<?> input, int numReducers, boolean hasOrderBy) throws SemanticException {
RowResolver inputRR = opParseCtx.get(input).getRowResolver();
// First generate the expression for the partition and sort keys
// The cluster by clause / distribute by clause has the aliases for
// partition function
ASTNode partitionExprs = qb.getParseInfo().getClusterByForClause(dest);
if (partitionExprs == null) {
partitionExprs = qb.getParseInfo().getDistributeByForClause(dest);
}
ArrayList<ExprNodeDesc> partCols = new ArrayList<ExprNodeDesc>();
if (partitionExprs != null) {
int ccount = partitionExprs.getChildCount();
for (int i = 0; i < ccount; ++i) {
ASTNode cl = (ASTNode) partitionExprs.getChild(i);
partCols.add(genExprNodeDesc(cl, inputRR));
}
}
ASTNode sortExprs = qb.getParseInfo().getClusterByForClause(dest);
if (sortExprs == null) {
sortExprs = qb.getParseInfo().getSortByForClause(dest);
}
if (sortExprs == null) {
sortExprs = qb.getParseInfo().getOrderByForClause(dest);
if (sortExprs != null) {
assert numReducers == 1;
// in strict mode, in the presence of order by, limit must be specified
if (qb.getParseInfo().getDestLimit(dest) == null) {
String error = StrictChecks.checkNoLimit(conf);
if (error != null) {
throw new SemanticException(generateErrorMessage(sortExprs, error));
}
}
}
}
ArrayList<ExprNodeDesc> sortCols = new ArrayList<ExprNodeDesc>();
StringBuilder order = new StringBuilder();
StringBuilder nullOrder = new StringBuilder();
if (sortExprs != null) {
int ccount = sortExprs.getChildCount();
for (int i = 0; i < ccount; ++i) {
ASTNode cl = (ASTNode) sortExprs.getChild(i);
if (cl.getType() == HiveParser.TOK_TABSORTCOLNAMEASC) {
// SortBy ASC
order.append("+");
cl = (ASTNode) cl.getChild(0);
if (cl.getType() == HiveParser.TOK_NULLS_FIRST) {
nullOrder.append("a");
} else if (cl.getType() == HiveParser.TOK_NULLS_LAST) {
nullOrder.append("z");
} else {
throw new SemanticException("Unexpected null ordering option: " + cl.getType());
}
cl = (ASTNode) cl.getChild(0);
} else if (cl.getType() == HiveParser.TOK_TABSORTCOLNAMEDESC) {
// SortBy DESC
order.append("-");
cl = (ASTNode) cl.getChild(0);
if (cl.getType() == HiveParser.TOK_NULLS_FIRST) {
nullOrder.append("a");
} else if (cl.getType() == HiveParser.TOK_NULLS_LAST) {
nullOrder.append("z");
} else {
throw new SemanticException("Unexpected null ordering option: " + cl.getType());
}
cl = (ASTNode) cl.getChild(0);
} else {
// ClusterBy
order.append("+");
nullOrder.append("a");
}
ExprNodeDesc exprNode = genExprNodeDesc(cl, inputRR);
sortCols.add(exprNode);
}
}
Operator result = genReduceSinkPlan(input, partCols, sortCols, order.toString(), nullOrder.toString(), numReducers, Operation.NOT_ACID, true);
if (result.getParentOperators().size() == 1 && result.getParentOperators().get(0) instanceof ReduceSinkOperator) {
((ReduceSinkOperator) result.getParentOperators().get(0)).getConf().setHasOrderBy(hasOrderBy);
}
return result;
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.
the class SemanticAnalyzer method genGroupByPlanGroupByOperator2MR.
/**
* Generate the second GroupByOperator for the Group By Plan
* (parseInfo.getXXX(dest)). The new GroupByOperator will do the second
* aggregation based on the partial aggregation results.
*
* @param mode
* the mode of aggregation (FINAL)
* @param genericUDAFEvaluators
* The mapping from Aggregation StringTree to the
* genericUDAFEvaluator.
* @return the new GroupByOperator
* @throws SemanticException
*/
@SuppressWarnings("nls")
private Operator genGroupByPlanGroupByOperator2MR(QBParseInfo parseInfo, String dest, Operator reduceSinkOperatorInfo2, GroupByDesc.Mode mode, Map<String, GenericUDAFEvaluator> genericUDAFEvaluators, boolean groupingSetsPresent) throws SemanticException {
RowResolver groupByInputRowResolver2 = opParseCtx.get(reduceSinkOperatorInfo2).getRowResolver();
RowResolver groupByOutputRowResolver2 = new RowResolver();
groupByOutputRowResolver2.setIsExprResolver(true);
ArrayList<ExprNodeDesc> groupByKeys = new ArrayList<ExprNodeDesc>();
ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
List<ASTNode> grpByExprs = getGroupByForClause(parseInfo, dest);
ArrayList<String> outputColumnNames = new ArrayList<String>();
for (int i = 0; i < grpByExprs.size(); ++i) {
ASTNode grpbyExpr = grpByExprs.get(i);
ColumnInfo exprInfo = groupByInputRowResolver2.getExpression(grpbyExpr);
if (exprInfo == null) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(grpbyExpr));
}
String expression = exprInfo.getInternalName();
groupByKeys.add(new ExprNodeColumnDesc(exprInfo.getType(), expression, exprInfo.getTabAlias(), exprInfo.getIsVirtualCol()));
String field = getColumnInternalName(i);
outputColumnNames.add(field);
ColumnInfo oColInfo = new ColumnInfo(field, exprInfo.getType(), "", false);
groupByOutputRowResolver2.putExpression(grpbyExpr, oColInfo);
addAlternateGByKeyMappings(grpbyExpr, oColInfo, reduceSinkOperatorInfo2, groupByOutputRowResolver2);
colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
}
int groupingSetsPosition = -1;
// For grouping sets, add a dummy grouping key
if (groupingSetsPresent) {
groupingSetsPosition = groupByKeys.size();
addGroupingSetKey(groupByKeys, groupByInputRowResolver2, groupByOutputRowResolver2, outputColumnNames, colExprMap);
}
HashMap<String, ASTNode> aggregationTrees = parseInfo.getAggregationExprsForClause(dest);
boolean containsDistinctAggr = false;
for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {
ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
ASTNode value = entry.getValue();
ColumnInfo paraExprInfo = groupByInputRowResolver2.getExpression(value);
if (paraExprInfo == null) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(value));
}
String paraExpression = paraExprInfo.getInternalName();
assert (paraExpression != null);
aggParameters.add(new ExprNodeColumnDesc(paraExprInfo.getType(), paraExpression, paraExprInfo.getTabAlias(), paraExprInfo.getIsVirtualCol()));
String aggName = unescapeIdentifier(value.getChild(0).getText());
boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI;
containsDistinctAggr = containsDistinctAggr || isDistinct;
boolean isStar = value.getType() == HiveParser.TOK_FUNCTIONSTAR;
Mode amode = groupByDescModeToUDAFMode(mode, isDistinct);
GenericUDAFEvaluator genericUDAFEvaluator = genericUDAFEvaluators.get(entry.getKey());
assert (genericUDAFEvaluator != null);
GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters);
aggregations.add(new AggregationDesc(aggName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, (mode != GroupByDesc.Mode.FINAL && value.getToken().getType() == HiveParser.TOK_FUNCTIONDI), amode));
String field = getColumnInternalName(groupByKeys.size() + aggregations.size() - 1);
outputColumnNames.add(field);
groupByOutputRowResolver2.putExpression(value, new ColumnInfo(field, udaf.returnType, "", false));
}
float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations, false, groupByMemoryUsage, memoryThreshold, null, false, groupingSetsPosition, containsDistinctAggr), new RowSchema(groupByOutputRowResolver2.getColumnInfos()), reduceSinkOperatorInfo2), groupByOutputRowResolver2);
op.setColumnExprMap(colExprMap);
return op;
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.
the class ReduceSinkOperator method initializeOp.
@Override
protected void initializeOp(Configuration hconf) throws HiveException {
super.initializeOp(hconf);
try {
numRows = 0;
cntr = 1;
logEveryNRows = HiveConf.getLongVar(hconf, HiveConf.ConfVars.HIVE_LOG_N_RECORDS);
statsMap.put(getCounterName(Counter.RECORDS_OUT_INTERMEDIATE, hconf), recordCounter);
List<ExprNodeDesc> keys = conf.getKeyCols();
if (isLogDebugEnabled) {
LOG.debug("keys size is " + keys.size());
for (ExprNodeDesc k : keys) {
LOG.debug("Key exprNodeDesc " + k.getExprString());
}
}
keyEval = new ExprNodeEvaluator[keys.size()];
int i = 0;
for (ExprNodeDesc e : keys) {
if (e instanceof ExprNodeConstantDesc && (BUCKET_NUMBER_COL_NAME).equals(((ExprNodeConstantDesc) e).getValue())) {
buckColIdxInKeyForSdpo = i;
}
keyEval[i++] = ExprNodeEvaluatorFactory.get(e);
}
numDistributionKeys = conf.getNumDistributionKeys();
distinctColIndices = conf.getDistinctColumnIndices();
numDistinctExprs = distinctColIndices.size();
valueEval = new ExprNodeEvaluator[conf.getValueCols().size()];
i = 0;
for (ExprNodeDesc e : conf.getValueCols()) {
valueEval[i++] = ExprNodeEvaluatorFactory.get(e);
}
partitionEval = new ExprNodeEvaluator[conf.getPartitionCols().size()];
i = 0;
for (ExprNodeDesc e : conf.getPartitionCols()) {
int index = ExprNodeDescUtils.indexOf(e, keys);
partitionEval[i++] = index < 0 ? ExprNodeEvaluatorFactory.get(e) : keyEval[index];
}
if (conf.getBucketCols() != null && !conf.getBucketCols().isEmpty()) {
bucketEval = new ExprNodeEvaluator[conf.getBucketCols().size()];
i = 0;
for (ExprNodeDesc e : conf.getBucketCols()) {
int index = ExprNodeDescUtils.indexOf(e, keys);
bucketEval[i++] = index < 0 ? ExprNodeEvaluatorFactory.get(e) : keyEval[index];
}
buckColIdxInKey = conf.getPartitionCols().size();
}
tag = conf.getTag();
tagByte[0] = (byte) tag;
skipTag = conf.getSkipTag();
if (isLogInfoEnabled) {
LOG.info("Using tag = " + tag);
}
TableDesc keyTableDesc = conf.getKeySerializeInfo();
keySerializer = (Serializer) keyTableDesc.getDeserializerClass().newInstance();
keySerializer.initialize(null, keyTableDesc.getProperties());
keyIsText = keySerializer.getSerializedClass().equals(Text.class);
TableDesc valueTableDesc = conf.getValueSerializeInfo();
valueSerializer = (Serializer) valueTableDesc.getDeserializerClass().newInstance();
valueSerializer.initialize(null, valueTableDesc.getProperties());
int limit = conf.getTopN();
float memUsage = conf.getTopNMemoryUsage();
if (limit >= 0 && memUsage > 0) {
reducerHash = conf.isPTFReduceSink() ? new PTFTopNHash() : new TopNHash();
reducerHash.initialize(limit, memUsage, conf.isMapGroupBy(), this, conf, hconf);
}
useUniformHash = conf.getReducerTraits().contains(UNIFORM);
firstRow = true;
} catch (Exception e) {
String msg = "Error initializing ReduceSinkOperator: " + e.getMessage();
LOG.error(msg, e);
throw new RuntimeException(e);
}
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.
the class DynamicPartitionPruner method initialize.
private void initialize() throws SerDeException {
this.clear();
Map<String, SourceInfo> columnMap = new HashMap<String, SourceInfo>();
// sources represent vertex names
Set<String> sources = work.getEventSourceTableDescMap().keySet();
sourcesWaitingForEvents.addAll(sources);
for (String s : sources) {
// Set to 0 to start with. This will be decremented for all columns for which events
// are generated by this source - which is eventually used to determine number of expected
// events for the source. #colums X #tasks
numExpectedEventsPerSource.put(s, new MutableInt(0));
numEventsSeenPerSource.put(s, new MutableInt(0));
// Virtual relation generated by the reduce sync
List<TableDesc> tables = work.getEventSourceTableDescMap().get(s);
// Real column name - on which the operation is being performed
List<String> columnNames = work.getEventSourceColumnNameMap().get(s);
// Column type
List<String> columnTypes = work.getEventSourceColumnTypeMap().get(s);
// Expression for the operation. e.g. N^2 > 10
List<ExprNodeDesc> partKeyExprs = work.getEventSourcePartKeyExprMap().get(s);
// eventSourceTableDesc, eventSourceColumnName, evenSourcePartKeyExpr move in lock-step.
// One entry is added to each at the same time
Iterator<String> cit = columnNames.iterator();
Iterator<String> typit = columnTypes.iterator();
Iterator<ExprNodeDesc> pit = partKeyExprs.iterator();
// A single source can process multiple columns, and will send an event for each of them.
for (TableDesc t : tables) {
numExpectedEventsPerSource.get(s).decrement();
++sourceInfoCount;
String columnName = cit.next();
String columnType = typit.next();
ExprNodeDesc partKeyExpr = pit.next();
SourceInfo si = createSourceInfo(t, partKeyExpr, columnName, columnType, jobConf);
if (!sourceInfoMap.containsKey(s)) {
sourceInfoMap.put(s, new ArrayList<SourceInfo>());
}
List<SourceInfo> sis = sourceInfoMap.get(s);
sis.add(si);
// the union of the values in that case.
if (columnMap.containsKey(columnName)) {
// All Sources are initialized up front. Events from different sources will end up getting added to the same list.
// Pruning is disabled if either source sends in an event which causes pruning to be skipped
si.values = columnMap.get(columnName).values;
si.skipPruning = columnMap.get(columnName).skipPruning;
}
columnMap.put(columnName, si);
}
}
}
Aggregations