use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.
the class JoinVisitor method genJoin.
private JoinOperator genJoin(RelNode join, ExprNodeDesc[][] joinExpressions, List<List<ExprNodeDesc>> filterExpressions, List<Operator<?>> children, String[] baseSrc, String tabAlias) throws SemanticException {
// 1. Extract join type
JoinCondDesc[] joinCondns;
boolean semiJoin;
boolean noOuterJoin;
if (join instanceof HiveMultiJoin) {
HiveMultiJoin hmj = (HiveMultiJoin) join;
joinCondns = new JoinCondDesc[hmj.getJoinInputs().size()];
for (int i = 0; i < hmj.getJoinInputs().size(); i++) {
joinCondns[i] = new JoinCondDesc(new JoinCond(hmj.getJoinInputs().get(i).left, hmj.getJoinInputs().get(i).right, transformJoinType(hmj.getJoinTypes().get(i))));
}
semiJoin = false;
noOuterJoin = !hmj.isOuterJoin();
} else {
joinCondns = new JoinCondDesc[1];
JoinRelType joinRelType = JoinRelType.INNER;
if (join instanceof Join) {
joinRelType = ((Join) join).getJoinType();
}
JoinType joinType;
switch(joinRelType) {
case SEMI:
joinType = JoinType.LEFTSEMI;
semiJoin = true;
break;
case ANTI:
joinType = JoinType.ANTI;
semiJoin = true;
break;
default:
assert join instanceof Join;
joinType = transformJoinType(((Join) join).getJoinType());
semiJoin = false;
}
joinCondns[0] = new JoinCondDesc(new JoinCond(0, 1, joinType));
noOuterJoin = joinType != JoinType.FULLOUTER && joinType != JoinType.LEFTOUTER && joinType != JoinType.RIGHTOUTER;
}
// 2. We create the join aux structures
ArrayList<ColumnInfo> outputColumns = new ArrayList<ColumnInfo>();
ArrayList<String> outputColumnNames = new ArrayList<String>(join.getRowType().getFieldNames());
Operator<?>[] childOps = new Operator[children.size()];
Map<String, Byte> reversedExprs = new HashMap<String, Byte>();
Map<Byte, List<ExprNodeDesc>> exprMap = new HashMap<Byte, List<ExprNodeDesc>>();
Map<Byte, List<ExprNodeDesc>> filters = new HashMap<Byte, List<ExprNodeDesc>>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
HashMap<Integer, Set<String>> posToAliasMap = new HashMap<Integer, Set<String>>();
int outputPos = 0;
for (int pos = 0; pos < children.size(); pos++) {
// 2.1. Backtracking from RS
ReduceSinkOperator inputRS = (ReduceSinkOperator) children.get(pos);
if (inputRS.getNumParent() != 1) {
throw new SemanticException("RS should have single parent");
}
Operator<?> parent = inputRS.getParentOperators().get(0);
ReduceSinkDesc rsDesc = inputRS.getConf();
int[] index = inputRS.getValueIndex();
Byte tag = (byte) rsDesc.getTag();
// 2.1.1. If semijoin...
if (semiJoin && pos != 0) {
exprMap.put(tag, new ArrayList<ExprNodeDesc>());
childOps[pos] = inputRS;
continue;
}
posToAliasMap.put(pos, new HashSet<String>(inputRS.getSchema().getTableNames()));
List<String> keyColNames = rsDesc.getOutputKeyColumnNames();
List<String> valColNames = rsDesc.getOutputValueColumnNames();
Map<String, ExprNodeDesc> descriptors = buildBacktrackFromReduceSinkForJoin(outputPos, outputColumnNames, keyColNames, valColNames, index, parent, baseSrc[pos]);
List<ColumnInfo> parentColumns = parent.getSchema().getSignature();
for (int i = 0; i < index.length; i++) {
ColumnInfo info = new ColumnInfo(parentColumns.get(i));
info.setInternalName(outputColumnNames.get(outputPos));
info.setTabAlias(tabAlias);
outputColumns.add(info);
reversedExprs.put(outputColumnNames.get(outputPos), tag);
outputPos++;
}
exprMap.put(tag, new ArrayList<ExprNodeDesc>(descriptors.values()));
colExprMap.putAll(descriptors);
childOps[pos] = inputRS;
}
// 3. We populate the filters and filterMap structure needed in the join descriptor
List<List<ExprNodeDesc>> filtersPerInput = Lists.newArrayList();
int[][] filterMap = new int[children.size()][];
for (int i = 0; i < children.size(); i++) {
filtersPerInput.add(new ArrayList<ExprNodeDesc>());
}
// 3. We populate the filters structure
for (int i = 0; i < filterExpressions.size(); i++) {
int leftPos = joinCondns[i].getLeft();
int rightPos = joinCondns[i].getRight();
for (ExprNodeDesc expr : filterExpressions.get(i)) {
// We need to update the exprNode, as currently
// they refer to columns in the output of the join;
// they should refer to the columns output by the RS
int inputPos = updateExprNode(expr, reversedExprs, colExprMap);
if (inputPos == -1) {
inputPos = leftPos;
}
filtersPerInput.get(inputPos).add(expr);
if (joinCondns[i].getType() == JoinDesc.FULL_OUTER_JOIN || joinCondns[i].getType() == JoinDesc.LEFT_OUTER_JOIN || joinCondns[i].getType() == JoinDesc.RIGHT_OUTER_JOIN) {
if (inputPos == leftPos) {
updateFilterMap(filterMap, leftPos, rightPos);
} else {
updateFilterMap(filterMap, rightPos, leftPos);
}
}
}
}
for (int pos = 0; pos < children.size(); pos++) {
ReduceSinkOperator inputRS = (ReduceSinkOperator) children.get(pos);
ReduceSinkDesc rsDesc = inputRS.getConf();
Byte tag = (byte) rsDesc.getTag();
filters.put(tag, filtersPerInput.get(pos));
}
// 4. We create the join operator with its descriptor
JoinDesc desc = new JoinDesc(exprMap, outputColumnNames, noOuterJoin, joinCondns, filters, joinExpressions, null);
desc.setReversedExprs(reversedExprs);
desc.setFilterMap(filterMap);
JoinOperator joinOp = (JoinOperator) OperatorFactory.getAndMakeChild(childOps[0].getCompilationOpContext(), desc, new RowSchema(outputColumns), childOps);
joinOp.setColumnExprMap(colExprMap);
joinOp.setPosToAliasMap(posToAliasMap);
joinOp.getConf().setBaseSrc(baseSrc);
if (LOG.isDebugEnabled()) {
LOG.debug("Generated " + joinOp + " with row schema: [" + joinOp.getSchema() + "]");
}
return joinOp;
}
use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.
the class ExprProcFactory method getExprString.
/**
* Get the expression string of an expression node.
*/
public static String getExprString(RowSchema rs, ExprNodeDesc expr, LineageCtx lctx, Operator<? extends OperatorDesc> inpOp, Predicate cond) {
if (expr instanceof ExprNodeColumnDesc) {
ExprNodeColumnDesc col = (ExprNodeColumnDesc) expr;
String internalName = col.getColumn();
String alias = internalName;
String tabAlias = col.getTabAlias();
ColumnInfo ci = rs.getColumnInfo(internalName);
if (ci != null) {
if (ci.getAlias() != null) {
alias = ci.getAlias();
}
if (ci.getTabAlias() != null) {
tabAlias = ci.getTabAlias();
}
}
Dependency dep = lctx.getIndex().getDependency(inpOp, internalName);
if ((tabAlias == null || tabAlias.startsWith("_") || tabAlias.startsWith("$")) && (dep != null && dep.getType() == DependencyType.SIMPLE)) {
Set<BaseColumnInfo> baseCols = dep.getBaseCols();
if (baseCols != null && !baseCols.isEmpty()) {
BaseColumnInfo baseCol = baseCols.iterator().next();
tabAlias = baseCol.getTabAlias().getAlias();
alias = baseCol.getColumn().getName();
}
}
if (tabAlias != null && tabAlias.length() > 0 && !tabAlias.startsWith("_") && !tabAlias.startsWith("$")) {
if (cond != null && !findSourceColumn(lctx, cond, tabAlias, alias) && dep != null) {
cond.getBaseCols().addAll(dep.getBaseCols());
}
return tabAlias + "." + alias;
}
if (dep != null) {
if (cond != null) {
cond.getBaseCols().addAll(dep.getBaseCols());
}
if (dep.getExpr() != null) {
return dep.getExpr();
}
}
if (alias.startsWith("_")) {
ci = inpOp.getSchema().getColumnInfo(internalName);
if (ci != null && ci.getAlias() != null) {
alias = ci.getAlias();
}
}
return alias;
} else if (expr instanceof ExprNodeGenericFuncDesc) {
ExprNodeGenericFuncDesc func = (ExprNodeGenericFuncDesc) expr;
List<ExprNodeDesc> children = func.getChildren();
String[] childrenExprStrings = new String[children.size()];
for (int i = 0; i < childrenExprStrings.length; i++) {
childrenExprStrings[i] = getExprString(rs, children.get(i), lctx, inpOp, cond);
}
return func.getGenericUDF().getDisplayString(childrenExprStrings);
}
return expr.getExprString();
}
use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.
the class BucketingSortingOpProcFactory method extractTraits.
static void extractTraits(BucketingSortingCtx bctx, ReduceSinkOperator rop, Operator<?> childop) throws SemanticException {
List<ExprNodeDesc> outputValues = Collections.emptyList();
if (childop instanceof SelectOperator) {
SelectDesc select = ((SelectOperator) childop).getConf();
outputValues = ExprNodeDescUtils.backtrack(select.getColList(), childop, rop);
}
if (outputValues.isEmpty()) {
return;
}
// Go through the set of partition columns, and find their representatives in the values
// These represent the bucketed columns
List<BucketCol> bucketCols = extractBucketCols(rop, outputValues);
// Go through the set of key columns, and find their representatives in the values
// These represent the sorted columns
List<SortCol> sortCols = extractSortCols(rop, outputValues);
List<ColumnInfo> colInfos = childop.getSchema().getSignature();
if (!bucketCols.isEmpty()) {
List<BucketCol> newBucketCols = getNewBucketCols(bucketCols, colInfos);
bctx.setBucketedCols(childop, newBucketCols);
}
if (!sortCols.isEmpty()) {
List<SortCol> newSortCols = getNewSortCols(sortCols, colInfos);
bctx.setSortedCols(childop, newSortCols);
}
}
use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.
the class ConstraintExprGenerator method getNotNullConstraintExpr.
private T getNotNullConstraintExpr(Table targetTable, RowResolver inputRR, boolean isUpdateStatement) throws SemanticException {
boolean forceNotNullConstraint = conf.getBoolVar(HiveConf.ConfVars.HIVE_ENFORCE_NOT_NULL_CONSTRAINT);
if (!forceNotNullConstraint) {
return null;
}
ImmutableBitSet nullConstraintBitSet;
try {
nullConstraintBitSet = getEnabledNotNullConstraints(targetTable);
} catch (SemanticException e) {
throw e;
} catch (Exception e) {
throw (new RuntimeException(e));
}
if (nullConstraintBitSet == null) {
return null;
}
T currUDF = null;
int constraintIdx = 0;
List<ColumnInfo> inputColInfos = inputRR.getColumnInfos();
for (int colExprIdx = 0; colExprIdx < inputColInfos.size(); colExprIdx++) {
if (isUpdateStatement && colExprIdx == 0) {
// for updates first column is _rowid
continue;
}
if (nullConstraintBitSet.indexOf(constraintIdx) != -1) {
T currExpr = typeCheckProcFactory.exprFactory.createColumnRefExpr(inputColInfos.get(colExprIdx), inputRR, 0);
T isNotNullUDF = exprProcessor.getFuncExprNodeDesc("isnotnull", currExpr);
if (currUDF != null) {
currUDF = exprProcessor.getFuncExprNodeDesc("and", currUDF, isNotNullUDF);
} else {
currUDF = isNotNullUDF;
}
}
constraintIdx++;
}
return currUDF;
}
use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.
the class TypeCheckProcFactory method processGByExpr.
/**
* Function to do groupby subexpression elimination. This is called by all the
* processors initially. As an example, consider the query select a+b,
* count(1) from T group by a+b; Then a+b is already precomputed in the group
* by operators key, so we substitute a+b in the select list with the internal
* column name of the a+b expression that appears in the in input row
* resolver.
*
* @param nd The node that is being inspected.
* @param procCtx The processor context.
* @return exprNodeColumnDesc.
*/
private T processGByExpr(Node nd, Object procCtx) throws SemanticException {
// We recursively create the exprNodeDesc. Base cases: when we encounter
// a column ref, we convert that into an exprNodeColumnDesc; when we
// encounter
// a constant, we convert that into an exprNodeConstantDesc. For others we
// just
// build the exprNodeFuncDesc with recursively built children.
ASTNode expr = (ASTNode) nd;
TypeCheckCtx ctx = (TypeCheckCtx) procCtx;
// having key in (select .. where a = min(b.value)
if (!ctx.isUseCaching() && ctx.getOuterRR() == null) {
return null;
}
RowResolver input = ctx.getInputRR();
T desc = null;
if ((ctx == null) || (input == null) || (!ctx.getAllowGBExprElimination())) {
return null;
}
// If the current subExpression is pre-calculated, as in Group-By etc.
ColumnInfo colInfo = input.getExpression(expr);
RowResolver usedRR = input;
int offset = 0;
// try outer row resolver
RowResolver outerRR = ctx.getOuterRR();
if (colInfo == null && outerRR != null) {
colInfo = outerRR.getExpression(expr);
usedRR = outerRR;
offset = input.getColumnInfos().size();
}
if (colInfo != null) {
desc = exprFactory.createColumnRefExpr(colInfo, usedRR, offset);
ASTNode source = input.getExpressionSource(expr);
if (source != null && ctx.getUnparseTranslator() != null) {
ctx.getUnparseTranslator().addCopyTranslation(expr, source);
}
return desc;
}
return desc;
}
Aggregations