Search in sources :

Example 21 with JoinCondDesc

use of org.apache.hadoop.hive.ql.plan.JoinCondDesc in project hive by apache.

the class MapJoinProcessor method convertMapJoin.

/**
 * convert a regular join to a a map-side join.
 *
 * @param op
 *          join operator
 * @param mapJoinPos
 *          position of the source to be read as part of map-reduce framework. All other sources
 *          are cached in memory
 * @param noCheckOuterJoin
 * @param validateMapJoinTree
 */
public MapJoinOperator convertMapJoin(HiveConf conf, JoinOperator op, boolean leftInputJoin, String[] baseSrc, List<String> mapAliases, int mapJoinPos, boolean noCheckOuterJoin, boolean validateMapJoinTree) throws SemanticException {
    // outer join cannot be performed on a table which is being cached
    JoinDesc desc = op.getConf();
    JoinCondDesc[] condns = desc.getConds();
    if (!noCheckOuterJoin) {
        if (checkMapJoin(mapJoinPos, condns) < 0) {
            throw new SemanticException(ErrorMsg.NO_OUTER_MAPJOIN.getMsg());
        }
    }
    // Walk over all the sources (which are guaranteed to be reduce sink
    // operators).
    // The join outputs a concatenation of all the inputs.
    List<Operator<? extends OperatorDesc>> parentOps = op.getParentOperators();
    List<Operator<? extends OperatorDesc>> newParentOps = new ArrayList<Operator<? extends OperatorDesc>>();
    List<Operator<? extends OperatorDesc>> oldReduceSinkParentOps = new ArrayList<Operator<? extends OperatorDesc>>();
    // found a source which is not to be stored in memory
    if (leftInputJoin) {
        // assert mapJoinPos == 0;
        Operator<? extends OperatorDesc> parentOp = parentOps.get(0);
        assert parentOp.getParentOperators().size() == 1;
        Operator<? extends OperatorDesc> grandParentOp = parentOp.getParentOperators().get(0);
        oldReduceSinkParentOps.add(parentOp);
        newParentOps.add(grandParentOp);
    }
    byte pos = 0;
    // Remove parent reduce-sink operators
    for (String src : baseSrc) {
        if (src != null) {
            Operator<? extends OperatorDesc> parentOp = parentOps.get(pos);
            assert parentOp.getParentOperators().size() == 1;
            Operator<? extends OperatorDesc> grandParentOp = parentOp.getParentOperators().get(0);
            oldReduceSinkParentOps.add(parentOp);
            newParentOps.add(grandParentOp);
        }
        pos++;
    }
    // create the map-join operator
    MapJoinOperator mapJoinOp = convertJoinOpMapJoinOp(conf, op, leftInputJoin, baseSrc, mapAliases, mapJoinPos, noCheckOuterJoin);
    if (mapJoinOp == null) {
        return null;
    }
    // remove old parents
    for (pos = 0; pos < newParentOps.size(); pos++) {
        newParentOps.get(pos).replaceChild(oldReduceSinkParentOps.get(pos), mapJoinOp);
    }
    mapJoinOp.getParentOperators().removeAll(oldReduceSinkParentOps);
    mapJoinOp.setParentOperators(newParentOps);
    // make sure only map-joins can be performed.
    if (validateMapJoinTree) {
        validateMapJoinTypes(mapJoinOp);
    }
    return mapJoinOp;
}
Also used : LateralViewJoinOperator(org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) ScriptOperator(org.apache.hadoop.hive.ql.exec.ScriptOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) ArrayList(java.util.ArrayList) MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) JoinDesc(org.apache.hadoop.hive.ql.plan.JoinDesc) SMBJoinDesc(org.apache.hadoop.hive.ql.plan.SMBJoinDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) JoinCondDesc(org.apache.hadoop.hive.ql.plan.JoinCondDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 22 with JoinCondDesc

use of org.apache.hadoop.hive.ql.plan.JoinCondDesc in project hive by apache.

the class TestRuntimeStatsPersistence method checkPersistJoinCondDesc.

@Test
public void checkPersistJoinCondDesc() throws Exception {
    JoinCondDesc jcd = new JoinCondDesc(1, 2, 3);
    JoinCondDesc jcd2 = persistenceLoop(jcd, JoinCondDesc.class);
    assertEquals(jcd, jcd2);
}
Also used : JoinCondDesc(org.apache.hadoop.hive.ql.plan.JoinCondDesc) Test(org.junit.Test)

Aggregations

JoinCondDesc (org.apache.hadoop.hive.ql.plan.JoinCondDesc)22 JoinDesc (org.apache.hadoop.hive.ql.plan.JoinDesc)12 MapJoinDesc (org.apache.hadoop.hive.ql.plan.MapJoinDesc)11 ArrayList (java.util.ArrayList)10 List (java.util.List)9 HashMap (java.util.HashMap)8 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)8 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)8 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)7 Operator (org.apache.hadoop.hive.ql.exec.Operator)7 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)7 HashSet (java.util.HashSet)6 LinkedHashMap (java.util.LinkedHashMap)5 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)5 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)5 ReduceSinkDesc (org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)5 Set (java.util.Set)4 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)4 SMBMapJoinOperator (org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator)4 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)4