use of org.apache.hadoop.hive.ql.plan.JoinCondDesc in project hive by apache.
the class CommonMergeJoinOperator method getFetchInputAtCloseList.
/*
* In case of outer joins, we need to push records through even if one of the sides is done
* sending records. For e.g. In the case of full outer join, the right side needs to send in data
* for the join even after the left side has completed sending all the records on its side. This
* can be done once at initialize time and at close, these tags will still forward records until
* they have no more to send. Also, subsequent joins need to fetch their data as well since
* any join following the outer join could produce results with one of the outer sides depending on
* the join condition. We could optimize for the case of inner joins in the future here.
*/
private Set<Integer> getFetchInputAtCloseList() {
Set<Integer> retval = new TreeSet<Integer>();
for (JoinCondDesc joinCondDesc : conf.getConds()) {
retval.add(joinCondDesc.getLeft());
retval.add(joinCondDesc.getRight());
}
return retval;
}
use of org.apache.hadoop.hive.ql.plan.JoinCondDesc in project hive by apache.
the class AbstractSMBJoinProc method canConvertJoinToBucketMapJoin.
// Can the join operator be converted to a bucket map-merge join operator ?
@SuppressWarnings("unchecked")
protected boolean canConvertJoinToBucketMapJoin(JoinOperator joinOp, SortBucketJoinProcCtx context) throws SemanticException {
// This has already been inspected and rejected
if (context.getRejectedJoinOps().contains(joinOp)) {
return false;
}
if (!this.pGraphContext.getJoinOps().contains(joinOp)) {
return false;
}
Class<? extends BigTableSelectorForAutoSMJ> bigTableMatcherClass = null;
try {
String selector = HiveConf.getVar(pGraphContext.getConf(), HiveConf.ConfVars.HIVE_AUTO_SORTMERGE_JOIN_BIGTABLE_SELECTOR);
bigTableMatcherClass = JavaUtils.loadClass(selector);
} catch (ClassNotFoundException e) {
throw new SemanticException(e.getMessage());
}
BigTableSelectorForAutoSMJ bigTableMatcher = ReflectionUtils.newInstance(bigTableMatcherClass, null);
JoinDesc joinDesc = joinOp.getConf();
JoinCondDesc[] joinCondns = joinDesc.getConds();
Set<Integer> joinCandidates = MapJoinProcessor.getBigTableCandidates(joinCondns);
if (joinCandidates.isEmpty()) {
// of any type. So return false.
return false;
}
int bigTablePosition = bigTableMatcher.getBigTablePosition(pGraphContext, joinOp, joinCandidates);
if (bigTablePosition < 0) {
// contains aliases from sub-query
return false;
}
context.setBigTablePosition(bigTablePosition);
String joinAlias = bigTablePosition == 0 ? joinOp.getConf().getLeftAlias() : joinOp.getConf().getRightAliases()[bigTablePosition - 1];
joinAlias = QB.getAppendedAliasFromId(joinOp.getConf().getId(), joinAlias);
Map<Byte, List<ExprNodeDesc>> keyExprMap = new HashMap<Byte, List<ExprNodeDesc>>();
List<Operator<? extends OperatorDesc>> parentOps = joinOp.getParentOperators();
// get the join keys from parent ReduceSink operators
for (Operator<? extends OperatorDesc> parentOp : parentOps) {
ReduceSinkDesc rsconf = ((ReduceSinkOperator) parentOp).getConf();
Byte tag = (byte) rsconf.getTag();
List<ExprNodeDesc> keys = rsconf.getKeyCols();
keyExprMap.put(tag, keys);
}
context.setKeyExprMap(keyExprMap);
// Make a deep copy of the aliases so that they are not changed in the context
String[] joinSrcs = joinOp.getConf().getBaseSrc();
String[] srcs = new String[joinSrcs.length];
for (int srcPos = 0; srcPos < joinSrcs.length; srcPos++) {
joinSrcs[srcPos] = QB.getAppendedAliasFromId(joinOp.getConf().getId(), joinSrcs[srcPos]);
srcs[srcPos] = new String(joinSrcs[srcPos]);
}
// table matcher.
return checkConvertBucketMapJoin(context, joinOp.getConf().getAliasToOpInfo(), keyExprMap, joinAlias, Arrays.asList(srcs));
}
use of org.apache.hadoop.hive.ql.plan.JoinCondDesc in project hive by apache.
the class SemanticAnalyzer method genJoinOperatorChildren.
private Operator genJoinOperatorChildren(QBJoinTree join, Operator left, Operator[] right, HashSet<Integer> omitOpts, ExprNodeDesc[][] joinKeys) throws SemanticException {
RowResolver outputRR = new RowResolver();
ArrayList<String> outputColumnNames = new ArrayList<String>();
// all children are base classes
Operator<?>[] rightOps = new Operator[right.length];
int outputPos = 0;
Map<String, Byte> reversedExprs = new HashMap<String, Byte>();
HashMap<Byte, List<ExprNodeDesc>> exprMap = new HashMap<Byte, List<ExprNodeDesc>>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
HashMap<Integer, Set<String>> posToAliasMap = new HashMap<Integer, Set<String>>();
HashMap<Byte, List<ExprNodeDesc>> filterMap = new HashMap<Byte, List<ExprNodeDesc>>();
for (int pos = 0; pos < right.length; ++pos) {
Operator<?> input = right[pos] == null ? left : right[pos];
if (input == null) {
input = left;
}
ReduceSinkOperator rs = (ReduceSinkOperator) input;
if (rs.getNumParent() != 1) {
throw new SemanticException("RS should have single parent");
}
Operator<?> parent = rs.getParentOperators().get(0);
ReduceSinkDesc rsDesc = (ReduceSinkDesc) (input.getConf());
int[] index = rs.getValueIndex();
ArrayList<ExprNodeDesc> valueDesc = new ArrayList<ExprNodeDesc>();
ArrayList<ExprNodeDesc> filterDesc = new ArrayList<ExprNodeDesc>();
Byte tag = (byte) rsDesc.getTag();
// check whether this input operator produces output
if (omitOpts != null && omitOpts.contains(pos)) {
exprMap.put(tag, valueDesc);
filterMap.put(tag, filterDesc);
rightOps[pos] = input;
continue;
}
List<String> keyColNames = rsDesc.getOutputKeyColumnNames();
List<String> valColNames = rsDesc.getOutputValueColumnNames();
// prepare output descriptors for the input opt
RowResolver inputRR = opParseCtx.get(input).getRowResolver();
RowResolver parentRR = opParseCtx.get(parent).getRowResolver();
posToAliasMap.put(pos, new HashSet<String>(inputRR.getTableNames()));
List<ColumnInfo> columns = parentRR.getColumnInfos();
for (int i = 0; i < index.length; i++) {
ColumnInfo prev = columns.get(i);
String[] nm = parentRR.reverseLookup(prev.getInternalName());
String[] nm2 = parentRR.getAlternateMappings(prev.getInternalName());
if (outputRR.get(nm[0], nm[1]) != null) {
continue;
}
ColumnInfo info = new ColumnInfo(prev);
String field;
if (index[i] >= 0) {
field = Utilities.ReduceField.KEY + "." + keyColNames.get(index[i]);
} else {
field = Utilities.ReduceField.VALUE + "." + valColNames.get(-index[i] - 1);
}
String internalName = getColumnInternalName(outputColumnNames.size());
ExprNodeColumnDesc desc = new ExprNodeColumnDesc(info.getType(), field, info.getTabAlias(), info.getIsVirtualCol());
info.setInternalName(internalName);
colExprMap.put(internalName, desc);
outputRR.put(nm[0], nm[1], info);
if (nm2 != null) {
outputRR.addMappingOnly(nm2[0], nm2[1], info);
}
valueDesc.add(desc);
outputColumnNames.add(internalName);
reversedExprs.put(internalName, tag);
}
for (ASTNode cond : join.getFilters().get(tag)) {
filterDesc.add(genExprNodeDesc(cond, inputRR));
}
exprMap.put(tag, valueDesc);
filterMap.put(tag, filterDesc);
rightOps[pos] = input;
}
JoinCondDesc[] joinCondns = new JoinCondDesc[join.getJoinCond().length];
for (int i = 0; i < join.getJoinCond().length; i++) {
JoinCond condn = join.getJoinCond()[i];
joinCondns[i] = new JoinCondDesc(condn);
}
JoinDesc desc = new JoinDesc(exprMap, outputColumnNames, join.getNoOuterJoin(), joinCondns, filterMap, joinKeys);
desc.setReversedExprs(reversedExprs);
desc.setFilterMap(join.getFilterMap());
// For outer joins, add filters that apply to more than one input
if (!join.getNoOuterJoin() && join.getPostJoinFilters().size() != 0) {
List<ExprNodeDesc> residualFilterExprs = new ArrayList<ExprNodeDesc>();
for (ASTNode cond : join.getPostJoinFilters()) {
residualFilterExprs.add(genExprNodeDesc(cond, outputRR));
}
desc.setResidualFilterExprs(residualFilterExprs);
// Clean post-conditions
join.getPostJoinFilters().clear();
}
JoinOperator joinOp = (JoinOperator) OperatorFactory.getAndMakeChild(getOpContext(), desc, new RowSchema(outputRR.getColumnInfos()), rightOps);
joinOp.setColumnExprMap(colExprMap);
joinOp.setPosToAliasMap(posToAliasMap);
if (join.getNullSafes() != null) {
boolean[] nullsafes = new boolean[join.getNullSafes().size()];
for (int i = 0; i < nullsafes.length; i++) {
nullsafes[i] = join.getNullSafes().get(i);
}
desc.setNullSafes(nullsafes);
}
return putOpInsertMap(joinOp, outputRR);
}
Aggregations