use of org.apache.hadoop.hive.ql.exec.JoinOperator in project hive by apache.
the class SemanticAnalyzer method genJoinOperator.
private Operator genJoinOperator(QB qb, QBJoinTree joinTree, Map<String, Operator> map, Operator joiningOp) throws SemanticException {
QBJoinTree leftChild = joinTree.getJoinSrc();
Operator joinSrcOp = joiningOp instanceof JoinOperator ? joiningOp : null;
if (joinSrcOp == null && leftChild != null) {
joinSrcOp = genJoinOperator(qb, leftChild, map, null);
}
if (joinSrcOp != null) {
ArrayList<ASTNode> filter = joinTree.getFiltersForPushing().get(0);
for (ASTNode cond : filter) {
joinSrcOp = genFilterPlan(qb, cond, joinSrcOp, false);
}
}
String[] baseSrc = joinTree.getBaseSrc();
Operator[] srcOps = new Operator[baseSrc.length];
// set of input to the join that should be
HashSet<Integer> omitOpts = null;
// omitted by the output
int pos = 0;
for (String src : baseSrc) {
if (src != null) {
Operator srcOp = map.get(src.toLowerCase());
// for left-semi join, generate an additional selection & group-by
// operator before ReduceSink
ArrayList<ASTNode> fields = joinTree.getRHSSemijoinColumns(src);
if (fields != null) {
// the RHS table columns should be not be output from the join
if (omitOpts == null) {
omitOpts = new HashSet<Integer>();
}
omitOpts.add(pos);
// generate a selection operator for group-by keys only
srcOp = insertSelectForSemijoin(fields, srcOp);
// generate a groupby operator (HASH mode) for a map-side partial
// aggregation for semijoin
srcOps[pos++] = genMapGroupByForSemijoin(qb, fields, srcOp, GroupByDesc.Mode.HASH);
} else {
srcOps[pos++] = srcOp;
}
} else {
assert pos == 0;
srcOps[pos++] = joinSrcOp;
}
}
ExprNodeDesc[][] joinKeys = genJoinKeys(joinTree, srcOps);
for (int i = 0; i < srcOps.length; i++) {
// generate a ReduceSink operator for the join
String[] srcs = baseSrc[i] != null ? new String[] { baseSrc[i] } : joinTree.getLeftAliases();
if (!isCBOExecuted()) {
srcOps[i] = genNotNullFilterForJoinSourcePlan(qb, srcOps[i], joinTree, joinKeys[i]);
}
srcOps[i] = genJoinReduceSinkChild(qb, joinKeys[i], srcOps[i], srcs, joinTree.getNextTag());
}
JoinOperator joinOp = (JoinOperator) genJoinOperatorChildren(joinTree, joinSrcOp, srcOps, omitOpts, joinKeys);
joinOp.getConf().setQBJoinTreeProps(joinTree);
joinContext.put(joinOp, joinTree);
if (joinTree.getPostJoinFilters().size() != 0) {
// Safety check for postconditions
assert joinTree.getNoOuterJoin();
Operator op = joinOp;
for (ASTNode condn : joinTree.getPostJoinFilters()) {
op = genFilterPlan(qb, condn, op, false);
if (LOG.isDebugEnabled()) {
LOG.debug("Generated " + op + " with post-filtering conditions after JOIN operator");
}
}
return op;
}
return joinOp;
}
use of org.apache.hadoop.hive.ql.exec.JoinOperator in project hive by apache.
the class SemanticAnalyzer method genJoinOperatorChildren.
private Operator genJoinOperatorChildren(QBJoinTree join, Operator left, Operator[] right, HashSet<Integer> omitOpts, ExprNodeDesc[][] joinKeys) throws SemanticException {
RowResolver outputRR = new RowResolver();
ArrayList<String> outputColumnNames = new ArrayList<String>();
// all children are base classes
Operator<?>[] rightOps = new Operator[right.length];
int outputPos = 0;
Map<String, Byte> reversedExprs = new HashMap<String, Byte>();
HashMap<Byte, List<ExprNodeDesc>> exprMap = new HashMap<Byte, List<ExprNodeDesc>>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
HashMap<Integer, Set<String>> posToAliasMap = new HashMap<Integer, Set<String>>();
HashMap<Byte, List<ExprNodeDesc>> filterMap = new HashMap<Byte, List<ExprNodeDesc>>();
for (int pos = 0; pos < right.length; ++pos) {
Operator<?> input = right[pos] == null ? left : right[pos];
if (input == null) {
input = left;
}
ReduceSinkOperator rs = (ReduceSinkOperator) input;
if (rs.getNumParent() != 1) {
throw new SemanticException("RS should have single parent");
}
Operator<?> parent = rs.getParentOperators().get(0);
ReduceSinkDesc rsDesc = (ReduceSinkDesc) (input.getConf());
int[] index = rs.getValueIndex();
ArrayList<ExprNodeDesc> valueDesc = new ArrayList<ExprNodeDesc>();
ArrayList<ExprNodeDesc> filterDesc = new ArrayList<ExprNodeDesc>();
Byte tag = (byte) rsDesc.getTag();
// check whether this input operator produces output
if (omitOpts != null && omitOpts.contains(pos)) {
exprMap.put(tag, valueDesc);
filterMap.put(tag, filterDesc);
rightOps[pos] = input;
continue;
}
List<String> keyColNames = rsDesc.getOutputKeyColumnNames();
List<String> valColNames = rsDesc.getOutputValueColumnNames();
// prepare output descriptors for the input opt
RowResolver inputRR = opParseCtx.get(input).getRowResolver();
RowResolver parentRR = opParseCtx.get(parent).getRowResolver();
posToAliasMap.put(pos, new HashSet<String>(inputRR.getTableNames()));
List<ColumnInfo> columns = parentRR.getColumnInfos();
for (int i = 0; i < index.length; i++) {
ColumnInfo prev = columns.get(i);
String[] nm = parentRR.reverseLookup(prev.getInternalName());
String[] nm2 = parentRR.getAlternateMappings(prev.getInternalName());
if (outputRR.get(nm[0], nm[1]) != null) {
continue;
}
ColumnInfo info = new ColumnInfo(prev);
String field;
if (index[i] >= 0) {
field = Utilities.ReduceField.KEY + "." + keyColNames.get(index[i]);
} else {
field = Utilities.ReduceField.VALUE + "." + valColNames.get(-index[i] - 1);
}
String internalName = getColumnInternalName(outputColumnNames.size());
ExprNodeColumnDesc desc = new ExprNodeColumnDesc(info.getType(), field, info.getTabAlias(), info.getIsVirtualCol());
info.setInternalName(internalName);
colExprMap.put(internalName, desc);
outputRR.put(nm[0], nm[1], info);
if (nm2 != null) {
outputRR.addMappingOnly(nm2[0], nm2[1], info);
}
valueDesc.add(desc);
outputColumnNames.add(internalName);
reversedExprs.put(internalName, tag);
}
for (ASTNode cond : join.getFilters().get(tag)) {
filterDesc.add(genExprNodeDesc(cond, inputRR));
}
exprMap.put(tag, valueDesc);
filterMap.put(tag, filterDesc);
rightOps[pos] = input;
}
JoinCondDesc[] joinCondns = new JoinCondDesc[join.getJoinCond().length];
for (int i = 0; i < join.getJoinCond().length; i++) {
JoinCond condn = join.getJoinCond()[i];
joinCondns[i] = new JoinCondDesc(condn);
}
JoinDesc desc = new JoinDesc(exprMap, outputColumnNames, join.getNoOuterJoin(), joinCondns, filterMap, joinKeys);
desc.setReversedExprs(reversedExprs);
desc.setFilterMap(join.getFilterMap());
// For outer joins, add filters that apply to more than one input
if (!join.getNoOuterJoin() && join.getPostJoinFilters().size() != 0) {
List<ExprNodeDesc> residualFilterExprs = new ArrayList<ExprNodeDesc>();
for (ASTNode cond : join.getPostJoinFilters()) {
residualFilterExprs.add(genExprNodeDesc(cond, outputRR));
}
desc.setResidualFilterExprs(residualFilterExprs);
// Clean post-conditions
join.getPostJoinFilters().clear();
}
JoinOperator joinOp = (JoinOperator) OperatorFactory.getAndMakeChild(getOpContext(), desc, new RowSchema(outputRR.getColumnInfos()), rightOps);
joinOp.setColumnExprMap(colExprMap);
joinOp.setPosToAliasMap(posToAliasMap);
if (join.getNullSafes() != null) {
boolean[] nullsafes = new boolean[join.getNullSafes().size()];
for (int i = 0; i < nullsafes.length; i++) {
nullsafes[i] = join.getNullSafes().get(i);
}
desc.setNullSafes(nullsafes);
}
return putOpInsertMap(joinOp, outputRR);
}
Aggregations