use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.
the class ConstantPropagateProcFactory method evaluateColumn.
/**
* Evaluate column, replace the deterministic columns with constants if possible
*
* @param desc
* @param ctx
* @param op
* @param colToConstants
* @return
*/
private static ExprNodeDesc evaluateColumn(ExprNodeColumnDesc desc, ConstantPropagateProcCtx cppCtx, Operator<? extends Serializable> parent) {
RowSchema rs = parent.getSchema();
ColumnInfo ci = rs.getColumnInfo(desc.getColumn());
if (ci == null) {
if (LOG.isErrorEnabled()) {
LOG.error("Reverse look up of column " + desc + " error!");
}
ci = rs.getColumnInfo(desc.getTabAlias(), desc.getColumn());
}
if (ci == null) {
if (LOG.isErrorEnabled()) {
LOG.error("Can't resolve " + desc.getTabAlias() + "." + desc.getColumn());
}
return null;
}
ExprNodeDesc constant = null;
// Additional work for union operator, see union27.q
if (ci.getAlias() == null) {
for (Entry<ColumnInfo, ExprNodeDesc> e : cppCtx.getOpToConstantExprs().get(parent).entrySet()) {
if (e.getKey().getInternalName().equals(ci.getInternalName())) {
constant = e.getValue();
break;
}
}
} else {
constant = cppCtx.getOpToConstantExprs().get(parent).get(ci);
}
if (constant != null) {
if (constant instanceof ExprNodeConstantDesc && !constant.getTypeInfo().equals(desc.getTypeInfo())) {
return typeCast(constant, desc.getTypeInfo());
}
return constant;
} else {
return null;
}
}
use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.
the class MapJoinProcessor method convertJoinOpMapJoinOp.
public static MapJoinOperator convertJoinOpMapJoinOp(HiveConf hconf, JoinOperator op, boolean leftInputJoin, String[] baseSrc, List<String> mapAliases, int mapJoinPos, boolean noCheckOuterJoin, boolean adjustParentsChildren) throws SemanticException {
MapJoinDesc mapJoinDescriptor = getMapJoinDesc(hconf, op, leftInputJoin, baseSrc, mapAliases, mapJoinPos, noCheckOuterJoin, adjustParentsChildren);
// reduce sink row resolver used to generate map join op
RowSchema outputRS = op.getSchema();
MapJoinOperator mapJoinOp = (MapJoinOperator) OperatorFactory.getAndMakeChild(op.getCompilationOpContext(), mapJoinDescriptor, new RowSchema(outputRS.getSignature()), op.getParentOperators());
mapJoinOp.getConf().setReversedExprs(op.getConf().getReversedExprs());
Map<String, ExprNodeDesc> colExprMap = op.getColumnExprMap();
mapJoinOp.setColumnExprMap(colExprMap);
List<Operator<? extends OperatorDesc>> childOps = op.getChildOperators();
for (Operator<? extends OperatorDesc> childOp : childOps) {
childOp.replaceParent(op, mapJoinOp);
}
mapJoinOp.setPosToAliasMap(op.getPosToAliasMap());
mapJoinOp.setChildOperators(childOps);
op.setChildOperators(null);
op.setParentOperators(null);
return mapJoinOp;
}
use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.
the class OpProcFactory method createFilter.
protected static Object createFilter(Operator op, Map<String, List<ExprNodeDesc>> predicates, OpWalkerInfo owi) {
RowSchema inputRS = op.getSchema();
// combine all predicates into a single expression
List<ExprNodeDesc> preds = new ArrayList<ExprNodeDesc>();
Iterator<List<ExprNodeDesc>> iterator = predicates.values().iterator();
while (iterator.hasNext()) {
for (ExprNodeDesc pred : iterator.next()) {
preds = ExprNodeDescUtils.split(pred, preds);
}
}
if (preds.isEmpty()) {
return null;
}
ExprNodeDesc condn = ExprNodeDescUtils.mergePredicates(preds);
if (op instanceof TableScanOperator && condn instanceof ExprNodeGenericFuncDesc) {
boolean pushFilterToStorage;
HiveConf hiveConf = owi.getParseContext().getConf();
pushFilterToStorage = hiveConf.getBoolVar(HiveConf.ConfVars.HIVEOPTPPD_STORAGE);
if (pushFilterToStorage) {
condn = pushFilterToStorageHandler((TableScanOperator) op, (ExprNodeGenericFuncDesc) condn, owi, hiveConf);
if (condn == null) {
// we pushed the whole thing down
return null;
}
}
}
// add new filter op
List<Operator<? extends OperatorDesc>> originalChilren = op.getChildOperators();
op.setChildOperators(null);
Operator<FilterDesc> output = OperatorFactory.getAndMakeChild(new FilterDesc(condn, false), new RowSchema(inputRS.getSignature()), op);
output.setChildOperators(originalChilren);
for (Operator<? extends OperatorDesc> ch : originalChilren) {
List<Operator<? extends OperatorDesc>> parentOperators = ch.getParentOperators();
int pos = parentOperators.indexOf(op);
assert pos != -1;
parentOperators.remove(pos);
// add the new op as the old
parentOperators.add(pos, output);
}
if (HiveConf.getBoolVar(owi.getParseContext().getConf(), HiveConf.ConfVars.HIVEPPDREMOVEDUPLICATEFILTERS)) {
// remove the candidate filter ops
removeCandidates(op, owi);
}
// push down current ppd context to newly added filter
ExprWalkerInfo walkerInfo = owi.getPrunedPreds(op);
if (walkerInfo != null) {
walkerInfo.getNonFinalCandidates().clear();
owi.putPrunedPreds(output, walkerInfo);
}
return output;
}
use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.
the class ColumnStatsAutoGatherContext method replaceSelectOperatorProcess.
/**
* @param operator : the select operator in the analyze statement
* @param input : the operator right before FS in the insert overwrite statement
* @throws HiveException
*/
private void replaceSelectOperatorProcess(SelectOperator operator, Operator<? extends OperatorDesc> input) throws HiveException {
RowSchema selRS = operator.getSchema();
ArrayList<ColumnInfo> signature = new ArrayList<>();
OpParseContext inputCtx = sa.opParseCtx.get(input);
RowResolver inputRR = inputCtx.getRowResolver();
ArrayList<ColumnInfo> columns = inputRR.getColumnInfos();
ArrayList<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>();
ArrayList<String> columnNames = new ArrayList<String>();
Map<String, ExprNodeDesc> columnExprMap = new HashMap<String, ExprNodeDesc>();
// 1. deal with non-partition columns
for (int i = 0; i < this.columns.size(); i++) {
ColumnInfo col = columns.get(i);
ExprNodeDesc exprNodeDesc = new ExprNodeColumnDesc(col);
colList.add(exprNodeDesc);
String internalName = selRS.getColumnNames().get(i);
columnNames.add(internalName);
columnExprMap.put(internalName, exprNodeDesc);
signature.add(selRS.getSignature().get(i));
}
// if there is any partition column (in static partition or dynamic
// partition or mixed case)
int dynamicPartBegin = -1;
for (int i = 0; i < partitionColumns.size(); i++) {
ExprNodeDesc exprNodeDesc = null;
String partColName = partitionColumns.get(i).getName();
// 2. deal with static partition columns
if (partSpec != null && partSpec.containsKey(partColName) && partSpec.get(partColName) != null) {
if (dynamicPartBegin > 0) {
throw new SemanticException("Dynamic partition columns should not come before static partition columns.");
}
exprNodeDesc = new ExprNodeConstantDesc(partSpec.get(partColName));
TypeInfo srcType = exprNodeDesc.getTypeInfo();
TypeInfo destType = selRS.getSignature().get(this.columns.size() + i).getType();
if (!srcType.equals(destType)) {
// This may be possible when srcType is string but destType is integer
exprNodeDesc = ParseUtils.createConversionCast(exprNodeDesc, (PrimitiveTypeInfo) destType);
}
} else // 3. dynamic partition columns
{
dynamicPartBegin++;
ColumnInfo col = columns.get(this.columns.size() + dynamicPartBegin);
TypeInfo srcType = col.getType();
TypeInfo destType = selRS.getSignature().get(this.columns.size() + i).getType();
exprNodeDesc = new ExprNodeColumnDesc(col);
if (!srcType.equals(destType)) {
exprNodeDesc = ParseUtils.createConversionCast(exprNodeDesc, (PrimitiveTypeInfo) destType);
}
}
colList.add(exprNodeDesc);
String internalName = selRS.getColumnNames().get(this.columns.size() + i);
columnNames.add(internalName);
columnExprMap.put(internalName, exprNodeDesc);
signature.add(selRS.getSignature().get(this.columns.size() + i));
}
operator.setConf(new SelectDesc(colList, columnNames));
operator.setColumnExprMap(columnExprMap);
selRS.setSignature(signature);
operator.setSchema(selRS);
}
use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.
the class HiveGBOpConvUtil method genReduceSideGB2.
private static OpAttr genReduceSideGB2(OpAttr inputOpAf, GBInfo gbInfo) throws SemanticException {
ArrayList<String> outputColNames = new ArrayList<String>();
ArrayList<ColumnInfo> colInfoLst = new ArrayList<ColumnInfo>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
String colOutputName = null;
ReduceSinkOperator rs = (ReduceSinkOperator) inputOpAf.inputs.get(0);
List<ColumnInfo> rsColInfoLst = rs.getSchema().getSignature();
ColumnInfo ci;
// 1. Build GB Keys, grouping set starting position
// 1.1 First Add original GB Keys
ArrayList<ExprNodeDesc> gbKeys = ExprNodeDescUtils.genExprNodeDesc(rs, 0, gbInfo.gbKeys.size() - 1, false, false);
for (int i = 0; i < gbInfo.gbKeys.size(); i++) {
ci = rsColInfoLst.get(i);
colOutputName = gbInfo.outputColNames.get(i);
outputColNames.add(colOutputName);
colInfoLst.add(new ColumnInfo(colOutputName, ci.getType(), "", false));
colExprMap.put(colOutputName, gbKeys.get(i));
}
// 1.2 Add GrpSet Col
int groupingSetsPosition = -1;
if (inclGrpSetInReduceSide(gbInfo) && gbInfo.grpIdFunctionNeeded) {
groupingSetsPosition = gbKeys.size();
ExprNodeDesc grpSetColExpr = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, rsColInfoLst.get(groupingSetsPosition).getInternalName(), null, false);
gbKeys.add(grpSetColExpr);
colOutputName = gbInfo.outputColNames.get(gbInfo.outputColNames.size() - 1);
;
outputColNames.add(colOutputName);
colInfoLst.add(new ColumnInfo(colOutputName, TypeInfoFactory.stringTypeInfo, null, true));
colExprMap.put(colOutputName, grpSetColExpr);
}
// 2. Add UDAF
UDAFAttrs udafAttr;
ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
int udafStartPosInGBInfOutputColNames = gbInfo.grpSets.isEmpty() ? gbInfo.gbKeys.size() : gbInfo.gbKeys.size() * 2;
int udafStartPosInInputRS = gbInfo.grpSets.isEmpty() ? gbInfo.gbKeys.size() : gbInfo.gbKeys.size() + 1;
for (int i = 0; i < gbInfo.udafAttrs.size(); i++) {
udafAttr = gbInfo.udafAttrs.get(i);
ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
aggParameters.add(new ExprNodeColumnDesc(rsColInfoLst.get(udafStartPosInInputRS + i)));
colOutputName = gbInfo.outputColNames.get(udafStartPosInGBInfOutputColNames + i);
outputColNames.add(colOutputName);
Mode udafMode = SemanticAnalyzer.groupByDescModeToUDAFMode(GroupByDesc.Mode.FINAL, udafAttr.isDistinctUDAF);
GenericUDAFInfo udaf = SemanticAnalyzer.getGenericUDAFInfo(udafAttr.udafEvaluator, udafMode, aggParameters);
aggregations.add(new AggregationDesc(udafAttr.udafName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, false, udafMode));
colInfoLst.add(new ColumnInfo(colOutputName, udaf.returnType, "", false));
}
Operator rsGBOp2 = OperatorFactory.getAndMakeChild(new GroupByDesc(GroupByDesc.Mode.FINAL, outputColNames, gbKeys, aggregations, false, gbInfo.groupByMemoryUsage, gbInfo.memoryThreshold, null, false, groupingSetsPosition, gbInfo.containsDistinctAggr), new RowSchema(colInfoLst), rs);
rsGBOp2.setColumnExprMap(colExprMap);
// TODO: Shouldn't we propgate vc? is it vc col from tab or all vc
return new OpAttr("", new HashSet<Integer>(), rsGBOp2);
}
Aggregations