use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.
the class GroupByOperator method initializeOp.
@Override
protected void initializeOp(Configuration hconf) throws HiveException {
super.initializeOp(hconf);
numRowsInput = 0;
numRowsHashTbl = 0;
heartbeatInterval = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVESENDHEARTBEAT);
countAfterReport = 0;
groupingSetsPresent = conf.isGroupingSetsPresent();
ObjectInspector rowInspector = inputObjInspectors[0];
// init keyFields
int numKeys = conf.getKeys().size();
keyFields = new ExprNodeEvaluator[numKeys];
keyObjectInspectors = new ObjectInspector[numKeys];
currentKeyObjectInspectors = new ObjectInspector[numKeys];
for (int i = 0; i < numKeys; i++) {
keyFields[i] = ExprNodeEvaluatorFactory.get(conf.getKeys().get(i), hconf);
keyObjectInspectors[i] = keyFields[i].initialize(rowInspector);
currentKeyObjectInspectors[i] = ObjectInspectorUtils.getStandardObjectInspector(keyObjectInspectors[i], ObjectInspectorCopyOption.WRITABLE);
}
// each row
if (groupingSetsPresent) {
groupingSets = conf.getListGroupingSets();
groupingSetsPosition = conf.getGroupingSetPosition();
newKeysGroupingSets = new IntWritable[groupingSets.size()];
groupingSetsBitSet = new FastBitSet[groupingSets.size()];
int pos = 0;
for (Integer groupingSet : groupingSets) {
// Create the mapping corresponding to the grouping set
newKeysGroupingSets[pos] = new IntWritable(groupingSet);
groupingSetsBitSet[pos] = groupingSet2BitSet(groupingSet, groupingSetsPosition);
pos++;
}
}
// initialize unionExpr for reduce-side
// reduce KEY has union field as the last field if there are distinct
// aggregates in group-by.
List<? extends StructField> sfs = ((StructObjectInspector) rowInspector).getAllStructFieldRefs();
if (sfs.size() > 0) {
StructField keyField = sfs.get(0);
if (keyField.getFieldName().toUpperCase().equals(Utilities.ReduceField.KEY.name())) {
ObjectInspector keyObjInspector = keyField.getFieldObjectInspector();
if (keyObjInspector instanceof StructObjectInspector) {
List<? extends StructField> keysfs = ((StructObjectInspector) keyObjInspector).getAllStructFieldRefs();
if (keysfs.size() > 0) {
// the last field is the union field, if any
StructField sf = keysfs.get(keysfs.size() - 1);
if (sf.getFieldObjectInspector().getCategory().equals(ObjectInspector.Category.UNION)) {
unionExprEval = ExprNodeEvaluatorFactory.get(new ExprNodeColumnDesc(TypeInfoUtils.getTypeInfoFromObjectInspector(sf.getFieldObjectInspector()), keyField.getFieldName() + "." + sf.getFieldName(), null, false), hconf);
unionExprEval.initialize(rowInspector);
}
}
}
}
}
// init aggregationParameterFields
ArrayList<AggregationDesc> aggrs = conf.getAggregators();
aggregationParameterFields = new ExprNodeEvaluator[aggrs.size()][];
aggregationParameterObjectInspectors = new ObjectInspector[aggrs.size()][];
aggregationParameterStandardObjectInspectors = new ObjectInspector[aggrs.size()][];
aggregationParameterObjects = new Object[aggrs.size()][];
aggregationIsDistinct = new boolean[aggrs.size()];
for (int i = 0; i < aggrs.size(); i++) {
AggregationDesc aggr = aggrs.get(i);
ArrayList<ExprNodeDesc> parameters = aggr.getParameters();
aggregationParameterFields[i] = new ExprNodeEvaluator[parameters.size()];
aggregationParameterObjectInspectors[i] = new ObjectInspector[parameters.size()];
aggregationParameterStandardObjectInspectors[i] = new ObjectInspector[parameters.size()];
aggregationParameterObjects[i] = new Object[parameters.size()];
for (int j = 0; j < parameters.size(); j++) {
aggregationParameterFields[i][j] = ExprNodeEvaluatorFactory.get(parameters.get(j), hconf);
aggregationParameterObjectInspectors[i][j] = aggregationParameterFields[i][j].initialize(rowInspector);
if (unionExprEval != null) {
String[] names = parameters.get(j).getExprString().split("\\.");
// parameters of the form : KEY.colx:t.coly
if (Utilities.ReduceField.KEY.name().equals(names[0]) && names.length > 2) {
String name = names[names.length - 2];
int tag = Integer.parseInt(name.split("\\:")[1]);
if (aggr.getDistinct()) {
// is distinct
Set<Integer> set = distinctKeyAggrs.get(tag);
if (null == set) {
set = new HashSet<Integer>();
distinctKeyAggrs.put(tag, set);
}
if (!set.contains(i)) {
set.add(i);
}
} else {
Set<Integer> set = nonDistinctKeyAggrs.get(tag);
if (null == set) {
set = new HashSet<Integer>();
nonDistinctKeyAggrs.put(tag, set);
}
if (!set.contains(i)) {
set.add(i);
}
}
} else {
// will be KEY._COLx or VALUE._COLx
if (!nonDistinctAggrs.contains(i)) {
nonDistinctAggrs.add(i);
}
}
} else {
if (aggr.getDistinct()) {
aggregationIsDistinct[i] = true;
}
}
aggregationParameterStandardObjectInspectors[i][j] = ObjectInspectorUtils.getStandardObjectInspector(aggregationParameterObjectInspectors[i][j], ObjectInspectorCopyOption.WRITABLE);
aggregationParameterObjects[i][j] = null;
}
if (parameters.size() == 0) {
// for ex: count(*)
if (!nonDistinctAggrs.contains(i)) {
nonDistinctAggrs.add(i);
}
}
}
// init aggregationClasses
aggregationEvaluators = new GenericUDAFEvaluator[conf.getAggregators().size()];
for (int i = 0; i < aggregationEvaluators.length; i++) {
AggregationDesc agg = conf.getAggregators().get(i);
aggregationEvaluators[i] = agg.getGenericUDAFEvaluator();
}
MapredContext context = MapredContext.get();
if (context != null) {
for (GenericUDAFEvaluator genericUDAFEvaluator : aggregationEvaluators) {
context.setup(genericUDAFEvaluator);
}
}
// grouping id should be pruned, which is the last of key columns
// see ColumnPrunerGroupByProc
outputKeyLength = conf.pruneGroupingSetId() ? keyFields.length - 1 : keyFields.length;
// init objectInspectors
ObjectInspector[] objectInspectors = new ObjectInspector[outputKeyLength + aggregationEvaluators.length];
for (int i = 0; i < outputKeyLength; i++) {
objectInspectors[i] = currentKeyObjectInspectors[i];
}
for (int i = 0; i < aggregationEvaluators.length; i++) {
objectInspectors[outputKeyLength + i] = aggregationEvaluators[i].init(conf.getAggregators().get(i).getMode(), aggregationParameterObjectInspectors[i]);
}
aggregationsParametersLastInvoke = new Object[conf.getAggregators().size()][];
if ((conf.getMode() != GroupByDesc.Mode.HASH || conf.getBucketGroup()) && (!groupingSetsPresent)) {
aggregations = newAggregations();
hashAggr = false;
} else {
hashAggregations = new HashMap<KeyWrapper, AggregationBuffer[]>(256);
aggregations = newAggregations();
hashAggr = true;
keyPositionsSize = new ArrayList<Integer>();
aggrPositions = new List[aggregations.length];
groupbyMapAggrInterval = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEGROUPBYMAPINTERVAL);
// compare every groupbyMapAggrInterval rows
numRowsCompareHashAggr = groupbyMapAggrInterval;
minReductionHashAggr = HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEMAPAGGRHASHMINREDUCTION);
}
List<String> fieldNames = new ArrayList<String>(conf.getOutputColumnNames());
outputObjInspector = ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, Arrays.asList(objectInspectors));
KeyWrapperFactory keyWrapperFactory = new KeyWrapperFactory(keyFields, keyObjectInspectors, currentKeyObjectInspectors);
newKeys = keyWrapperFactory.getKeyWrapper();
isTez = HiveConf.getVar(hconf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez");
isLlap = isTez && HiveConf.getVar(hconf, HiveConf.ConfVars.HIVE_EXECUTION_MODE).equals("llap");
numExecutors = isLlap ? HiveConf.getIntVar(hconf, HiveConf.ConfVars.LLAP_DAEMON_NUM_EXECUTORS) : 1;
firstRow = true;
// is not known, estimate that based on the number of entries
if (hashAggr) {
computeMaxEntriesHashAggr();
}
memoryMXBean = ManagementFactory.getMemoryMXBean();
maxMemory = isTez ? getConf().getMaxMemoryAvailable() : memoryMXBean.getHeapMemoryUsage().getMax();
memoryThreshold = this.getConf().getMemoryThreshold();
LOG.info("isTez: {} isLlap: {} numExecutors: {} maxMemory: {}", isTez, isLlap, numExecutors, maxMemory);
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.
the class VectorizationContext method getIdentityExpression.
/**
* Used as a fast path for operations that don't modify their input, like unary +
* and casting boolean to long. IdentityExpression and its children are always
* projections.
*/
private VectorExpression getIdentityExpression(List<ExprNodeDesc> childExprList) throws HiveException {
ExprNodeDesc childExpr = childExprList.get(0);
int inputCol;
String colType;
VectorExpression v1 = null;
if (childExpr instanceof ExprNodeGenericFuncDesc) {
v1 = getVectorExpression(childExpr);
inputCol = v1.getOutputColumn();
colType = v1.getOutputType();
} else if (childExpr instanceof ExprNodeColumnDesc) {
ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) childExpr;
inputCol = getInputColumnIndex(colDesc.getColumn());
colType = colDesc.getTypeString();
} else {
throw new HiveException("Expression not supported: " + childExpr);
}
VectorExpression expr = new IdentityExpression(inputCol, colType);
if (v1 != null) {
expr.setChildExpressions(new VectorExpression[] { v1 });
}
return expr;
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.
the class ConvertAstToSearchArg method findVariable.
/**
* Find the variable in the expression.
* @param expr the expression to look in
* @return the index of the variable or -1 if there is not exactly one
* variable.
*/
private int findVariable(ExprNodeDesc expr) {
int result = -1;
List<ExprNodeDesc> children = expr.getChildren();
for (int i = 0; i < children.size(); ++i) {
ExprNodeDesc child = children.get(i);
if (child instanceof ExprNodeColumnDesc) {
// if we already found a variable, this isn't a sarg
if (result != -1) {
return -1;
} else {
result = i;
}
}
}
return result;
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.
the class ConvertAstToSearchArg method parse.
/**
* Do the recursive parse of the Hive ExprNodeDesc into our ExpressionTree.
* @param expression the Hive ExprNodeDesc
*/
private void parse(ExprNodeDesc expression) {
// handle the special cases.
if (expression.getClass() != ExprNodeGenericFuncDesc.class) {
// if it is a reference to a boolean column, covert it to a truth test.
if (expression instanceof ExprNodeColumnDesc) {
ExprNodeColumnDesc columnDesc = (ExprNodeColumnDesc) expression;
if (columnDesc.getTypeString().equals("boolean")) {
builder.equals(columnDesc.getColumn(), PredicateLeaf.Type.BOOLEAN, true);
return;
}
}
// otherwise, we don't know what to do so make it a maybe
builder.literal(SearchArgument.TruthValue.YES_NO_NULL);
return;
}
// get the kind of expression
ExprNodeGenericFuncDesc expr = (ExprNodeGenericFuncDesc) expression;
Class<?> op = expr.getGenericUDF().getClass();
// handle the logical operators
if (op == GenericUDFOPOr.class) {
builder.startOr();
addChildren(expr);
builder.end();
} else if (op == GenericUDFOPAnd.class) {
builder.startAnd();
addChildren(expr);
builder.end();
} else if (op == GenericUDFOPNot.class) {
builder.startNot();
addChildren(expr);
builder.end();
} else if (op == GenericUDFOPEqual.class) {
createLeaf(PredicateLeaf.Operator.EQUALS, expr);
} else if (op == GenericUDFOPNotEqual.class) {
builder.startNot();
createLeaf(PredicateLeaf.Operator.EQUALS, expr);
builder.end();
} else if (op == GenericUDFOPEqualNS.class) {
createLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, expr);
} else if (op == GenericUDFOPGreaterThan.class) {
builder.startNot();
createLeaf(PredicateLeaf.Operator.LESS_THAN_EQUALS, expr);
builder.end();
} else if (op == GenericUDFOPEqualOrGreaterThan.class) {
builder.startNot();
createLeaf(PredicateLeaf.Operator.LESS_THAN, expr);
builder.end();
} else if (op == GenericUDFOPLessThan.class) {
createLeaf(PredicateLeaf.Operator.LESS_THAN, expr);
} else if (op == GenericUDFOPEqualOrLessThan.class) {
createLeaf(PredicateLeaf.Operator.LESS_THAN_EQUALS, expr);
} else if (op == GenericUDFIn.class) {
createLeaf(PredicateLeaf.Operator.IN, expr, 0);
} else if (op == GenericUDFBetween.class) {
createLeaf(PredicateLeaf.Operator.BETWEEN, expr, 1);
} else if (op == GenericUDFOPNull.class) {
createLeaf(PredicateLeaf.Operator.IS_NULL, expr, 0);
} else if (op == GenericUDFOPNotNull.class) {
builder.startNot();
createLeaf(PredicateLeaf.Operator.IS_NULL, expr, 0);
builder.end();
// otherwise, we didn't understand it, so mark it maybe
} else {
builder.literal(SearchArgument.TruthValue.YES_NO_NULL);
}
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.
the class MapJoinProcessor method genSelectPlan.
protected void genSelectPlan(ParseContext pctx, MapJoinOperator input) throws SemanticException {
List<Operator<? extends OperatorDesc>> childOps = input.getChildOperators();
input.setChildOperators(null);
// create a dummy select - This select is needed by the walker to split the
// mapJoin later on
RowSchema inputRS = input.getSchema();
ArrayList<ExprNodeDesc> exprs = new ArrayList<ExprNodeDesc>();
ArrayList<String> outputs = new ArrayList<String>();
List<String> outputCols = input.getConf().getOutputColumnNames();
ArrayList<ColumnInfo> outputRS = new ArrayList<ColumnInfo>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
for (int i = 0; i < outputCols.size(); i++) {
String internalName = outputCols.get(i);
ColumnInfo valueInfo = inputRS.getColumnInfo(internalName);
ExprNodeDesc colDesc = new ExprNodeColumnDesc(valueInfo.getType(), valueInfo.getInternalName(), valueInfo.getTabAlias(), valueInfo.getIsVirtualCol());
exprs.add(colDesc);
outputs.add(internalName);
ColumnInfo newCol = new ColumnInfo(internalName, valueInfo.getType(), valueInfo.getTabAlias(), valueInfo.getIsVirtualCol(), valueInfo.isHiddenVirtualCol());
newCol.setAlias(valueInfo.getAlias());
outputRS.add(newCol);
colExprMap.put(internalName, colDesc);
}
SelectDesc select = new SelectDesc(exprs, outputs, false);
SelectOperator sel = (SelectOperator) OperatorFactory.getAndMakeChild(select, new RowSchema(outputRS), input);
sel.setColumnExprMap(colExprMap);
// Insert the select operator in between.
sel.setChildOperators(childOps);
for (Operator<? extends OperatorDesc> ch : childOps) {
ch.replaceParent(input, sel);
}
}
Aggregations