use of org.apache.hadoop.hive.ql.plan.JoinCondDesc in project hive by apache.
the class MapJoinProcessor method convertMapJoin.
/**
* convert a regular join to a a map-side join.
*
* @param op
* join operator
* @param mapJoinPos
* position of the source to be read as part of map-reduce framework. All other sources
* are cached in memory
* @param noCheckOuterJoin
* @param validateMapJoinTree
*/
public MapJoinOperator convertMapJoin(HiveConf conf, JoinOperator op, boolean leftInputJoin, String[] baseSrc, List<String> mapAliases, int mapJoinPos, boolean noCheckOuterJoin, boolean validateMapJoinTree) throws SemanticException {
// outer join cannot be performed on a table which is being cached
JoinDesc desc = op.getConf();
JoinCondDesc[] condns = desc.getConds();
if (!noCheckOuterJoin) {
if (checkMapJoin(mapJoinPos, condns) < 0) {
throw new SemanticException(ErrorMsg.NO_OUTER_MAPJOIN.getMsg());
}
}
// Walk over all the sources (which are guaranteed to be reduce sink
// operators).
// The join outputs a concatenation of all the inputs.
List<Operator<? extends OperatorDesc>> parentOps = op.getParentOperators();
List<Operator<? extends OperatorDesc>> newParentOps = new ArrayList<Operator<? extends OperatorDesc>>();
List<Operator<? extends OperatorDesc>> oldReduceSinkParentOps = new ArrayList<Operator<? extends OperatorDesc>>();
// found a source which is not to be stored in memory
if (leftInputJoin) {
// assert mapJoinPos == 0;
Operator<? extends OperatorDesc> parentOp = parentOps.get(0);
assert parentOp.getParentOperators().size() == 1;
Operator<? extends OperatorDesc> grandParentOp = parentOp.getParentOperators().get(0);
oldReduceSinkParentOps.add(parentOp);
newParentOps.add(grandParentOp);
}
byte pos = 0;
// Remove parent reduce-sink operators
for (String src : baseSrc) {
if (src != null) {
Operator<? extends OperatorDesc> parentOp = parentOps.get(pos);
assert parentOp.getParentOperators().size() == 1;
Operator<? extends OperatorDesc> grandParentOp = parentOp.getParentOperators().get(0);
oldReduceSinkParentOps.add(parentOp);
newParentOps.add(grandParentOp);
}
pos++;
}
// create the map-join operator
MapJoinOperator mapJoinOp = convertJoinOpMapJoinOp(conf, op, leftInputJoin, baseSrc, mapAliases, mapJoinPos, noCheckOuterJoin);
if (mapJoinOp == null) {
return null;
}
// remove old parents
for (pos = 0; pos < newParentOps.size(); pos++) {
newParentOps.get(pos).replaceChild(oldReduceSinkParentOps.get(pos), mapJoinOp);
}
mapJoinOp.getParentOperators().removeAll(oldReduceSinkParentOps);
mapJoinOp.setParentOperators(newParentOps);
// make sure only map-joins can be performed.
if (validateMapJoinTree) {
validateMapJoinTypes(mapJoinOp);
}
return mapJoinOp;
}
use of org.apache.hadoop.hive.ql.plan.JoinCondDesc in project hive by apache.
the class TestRuntimeStatsPersistence method checkPersistJoinCondDesc.
@Test
public void checkPersistJoinCondDesc() throws Exception {
JoinCondDesc jcd = new JoinCondDesc(1, 2, 3);
JoinCondDesc jcd2 = persistenceLoop(jcd, JoinCondDesc.class);
assertEquals(jcd, jcd2);
}
Aggregations