Search in sources :

Example 6 with JoinCondDesc

use of org.apache.hadoop.hive.ql.plan.JoinCondDesc in project hive by apache.

the class MapJoinProcessor method getMapJoinDesc.

public static MapJoinDesc getMapJoinDesc(HiveConf hconf, JoinOperator op, boolean leftInputJoin, String[] baseSrc, List<String> mapAliases, int mapJoinPos, boolean noCheckOuterJoin, boolean adjustParentsChildren) throws SemanticException {
    JoinDesc desc = op.getConf();
    JoinCondDesc[] condns = desc.getConds();
    Byte[] tagOrder = desc.getTagOrder();
    // outer join cannot be performed on a table which is being cached
    if (!noCheckOuterJoin) {
        if (checkMapJoin(mapJoinPos, condns) < 0) {
            throw new SemanticException(ErrorMsg.NO_OUTER_MAPJOIN.getMsg());
        }
    }
    Map<String, ExprNodeDesc> colExprMap = op.getColumnExprMap();
    List<ColumnInfo> schema = new ArrayList<ColumnInfo>(op.getSchema().getSignature());
    Map<Byte, List<ExprNodeDesc>> valueExprs = op.getConf().getExprs();
    Map<Byte, List<ExprNodeDesc>> newValueExprs = new HashMap<Byte, List<ExprNodeDesc>>();
    ObjectPair<List<ReduceSinkOperator>, Map<Byte, List<ExprNodeDesc>>> pair = getKeys(leftInputJoin, baseSrc, op);
    List<ReduceSinkOperator> oldReduceSinkParentOps = pair.getFirst();
    for (Map.Entry<Byte, List<ExprNodeDesc>> entry : valueExprs.entrySet()) {
        byte tag = entry.getKey();
        Operator<?> terminal = oldReduceSinkParentOps.get(tag);
        List<ExprNodeDesc> values = entry.getValue();
        List<ExprNodeDesc> newValues = ExprNodeDescUtils.backtrack(values, op, terminal);
        newValueExprs.put(tag, newValues);
        for (int i = 0; i < schema.size(); i++) {
            ColumnInfo column = schema.get(i);
            if (column == null) {
                continue;
            }
            ExprNodeDesc expr = colExprMap.get(column.getInternalName());
            int index = ExprNodeDescUtils.indexOf(expr, values);
            if (index >= 0) {
                colExprMap.put(column.getInternalName(), newValues.get(index));
                schema.set(i, null);
            }
        }
    }
    // rewrite value index for mapjoin
    Map<Byte, int[]> valueIndices = new HashMap<Byte, int[]>();
    // get the join keys from old parent ReduceSink operators
    Map<Byte, List<ExprNodeDesc>> keyExprMap = pair.getSecond();
    if (!adjustParentsChildren) {
        // Since we did not remove reduce sink parents, keep the original value expressions
        newValueExprs = valueExprs;
        // Join key exprs are represented in terms of the original table columns,
        // we need to convert these to the generated column names we can see in the Join operator
        Map<Byte, List<ExprNodeDesc>> newKeyExprMap = new HashMap<Byte, List<ExprNodeDesc>>();
        for (Map.Entry<Byte, List<ExprNodeDesc>> mapEntry : keyExprMap.entrySet()) {
            Byte pos = mapEntry.getKey();
            ReduceSinkOperator rsParent = oldReduceSinkParentOps.get(pos.byteValue());
            List<ExprNodeDesc> keyExprList = ExprNodeDescUtils.resolveJoinKeysAsRSColumns(mapEntry.getValue(), rsParent);
            if (keyExprList == null) {
                throw new SemanticException("Error resolving join keys");
            }
            newKeyExprMap.put(pos, keyExprList);
        }
        keyExprMap = newKeyExprMap;
    }
    // construct valueTableDescs and valueFilteredTableDescs
    List<TableDesc> valueTableDescs = new ArrayList<TableDesc>();
    List<TableDesc> valueFilteredTableDescs = new ArrayList<TableDesc>();
    int[][] filterMap = desc.getFilterMap();
    for (byte pos = 0; pos < op.getParentOperators().size(); pos++) {
        List<ExprNodeDesc> valueCols = newValueExprs.get(pos);
        if (pos != mapJoinPos) {
            // remove values in key exprs for value table schema
            // value expression for hashsink will be modified in
            // LocalMapJoinProcessor
            int[] valueIndex = new int[valueCols.size()];
            List<ExprNodeDesc> valueColsInValueExpr = new ArrayList<ExprNodeDesc>();
            for (int i = 0; i < valueIndex.length; i++) {
                ExprNodeDesc expr = valueCols.get(i);
                int kindex = ExprNodeDescUtils.indexOf(expr, keyExprMap.get(pos));
                if (kindex >= 0) {
                    valueIndex[i] = kindex;
                } else {
                    valueIndex[i] = -valueColsInValueExpr.size() - 1;
                    valueColsInValueExpr.add(expr);
                }
            }
            if (needValueIndex(valueIndex)) {
                valueIndices.put(pos, valueIndex);
            }
            valueCols = valueColsInValueExpr;
        }
        // deep copy expr node desc
        List<ExprNodeDesc> valueFilteredCols = ExprNodeDescUtils.clone(valueCols);
        if (filterMap != null && filterMap[pos] != null && pos != mapJoinPos) {
            ExprNodeColumnDesc isFilterDesc = new ExprNodeColumnDesc(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.SMALLINT_TYPE_NAME), "filter", "filter", false);
            valueFilteredCols.add(isFilterDesc);
        }
        TableDesc valueTableDesc = PlanUtils.getMapJoinValueTableDesc(PlanUtils.getFieldSchemasFromColumnList(valueCols, "mapjoinvalue"));
        TableDesc valueFilteredTableDesc = PlanUtils.getMapJoinValueTableDesc(PlanUtils.getFieldSchemasFromColumnList(valueFilteredCols, "mapjoinvalue"));
        valueTableDescs.add(valueTableDesc);
        valueFilteredTableDescs.add(valueFilteredTableDesc);
    }
    Map<Byte, List<ExprNodeDesc>> filters = desc.getFilters();
    Map<Byte, List<ExprNodeDesc>> newFilters = new HashMap<Byte, List<ExprNodeDesc>>();
    for (Map.Entry<Byte, List<ExprNodeDesc>> entry : filters.entrySet()) {
        byte srcTag = entry.getKey();
        List<ExprNodeDesc> filter = entry.getValue();
        Operator<?> terminal = oldReduceSinkParentOps.get(srcTag);
        newFilters.put(srcTag, ExprNodeDescUtils.backtrack(filter, op, terminal));
    }
    desc.setFilters(filters = newFilters);
    // create dumpfile prefix needed to create descriptor
    String dumpFilePrefix = "";
    if (mapAliases != null) {
        for (String mapAlias : mapAliases) {
            dumpFilePrefix = dumpFilePrefix + mapAlias;
        }
        dumpFilePrefix = dumpFilePrefix + "-" + PlanUtils.getCountForMapJoinDumpFilePrefix();
    } else {
        dumpFilePrefix = "mapfile" + PlanUtils.getCountForMapJoinDumpFilePrefix();
    }
    List<ExprNodeDesc> keyCols = keyExprMap.get((byte) mapJoinPos);
    List<String> outputColumnNames = op.getConf().getOutputColumnNames();
    TableDesc keyTableDesc = PlanUtils.getMapJoinKeyTableDesc(hconf, PlanUtils.getFieldSchemasFromColumnList(keyCols, MAPJOINKEY_FIELDPREFIX));
    JoinCondDesc[] joinCondns = op.getConf().getConds();
    MapJoinDesc mapJoinDescriptor = new MapJoinDesc(keyExprMap, keyTableDesc, newValueExprs, valueTableDescs, valueFilteredTableDescs, outputColumnNames, mapJoinPos, joinCondns, filters, op.getConf().getNoOuterJoin(), dumpFilePrefix);
    mapJoinDescriptor.setStatistics(op.getConf().getStatistics());
    mapJoinDescriptor.setTagOrder(tagOrder);
    mapJoinDescriptor.setNullSafes(desc.getNullSafes());
    mapJoinDescriptor.setFilterMap(desc.getFilterMap());
    mapJoinDescriptor.setResidualFilterExprs(desc.getResidualFilterExprs());
    if (!valueIndices.isEmpty()) {
        mapJoinDescriptor.setValueIndices(valueIndices);
    }
    return mapJoinDescriptor;
}
Also used : HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) List(java.util.List) ArrayList(java.util.ArrayList) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) JoinCondDesc(org.apache.hadoop.hive.ql.plan.JoinCondDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) JoinDesc(org.apache.hadoop.hive.ql.plan.JoinDesc) SMBJoinDesc(org.apache.hadoop.hive.ql.plan.SMBJoinDesc) Map(java.util.Map) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap)

Example 7 with JoinCondDesc

use of org.apache.hadoop.hive.ql.plan.JoinCondDesc in project hive by apache.

the class MapJoinProcessor method convertMapJoin.

/**
   * convert a regular join to a a map-side join.
   *
   * @param opParseCtxMap
   * @param op
   *          join operator
   * @param joinTree
   *          qb join tree
   * @param mapJoinPos
   *          position of the source to be read as part of map-reduce framework. All other sources
   *          are cached in memory
   * @param noCheckOuterJoin
   * @param validateMapJoinTree
   */
public MapJoinOperator convertMapJoin(HiveConf conf, JoinOperator op, boolean leftInputJoin, String[] baseSrc, List<String> mapAliases, int mapJoinPos, boolean noCheckOuterJoin, boolean validateMapJoinTree) throws SemanticException {
    // outer join cannot be performed on a table which is being cached
    JoinDesc desc = op.getConf();
    JoinCondDesc[] condns = desc.getConds();
    if (!noCheckOuterJoin) {
        if (checkMapJoin(mapJoinPos, condns) < 0) {
            throw new SemanticException(ErrorMsg.NO_OUTER_MAPJOIN.getMsg());
        }
    }
    // Walk over all the sources (which are guaranteed to be reduce sink
    // operators).
    // The join outputs a concatenation of all the inputs.
    List<Operator<? extends OperatorDesc>> parentOps = op.getParentOperators();
    List<Operator<? extends OperatorDesc>> newParentOps = new ArrayList<Operator<? extends OperatorDesc>>();
    List<Operator<? extends OperatorDesc>> oldReduceSinkParentOps = new ArrayList<Operator<? extends OperatorDesc>>();
    // found a source which is not to be stored in memory
    if (leftInputJoin) {
        // assert mapJoinPos == 0;
        Operator<? extends OperatorDesc> parentOp = parentOps.get(0);
        assert parentOp.getParentOperators().size() == 1;
        Operator<? extends OperatorDesc> grandParentOp = parentOp.getParentOperators().get(0);
        oldReduceSinkParentOps.add(parentOp);
        newParentOps.add(grandParentOp);
    }
    byte pos = 0;
    // Remove parent reduce-sink operators
    for (String src : baseSrc) {
        if (src != null) {
            Operator<? extends OperatorDesc> parentOp = parentOps.get(pos);
            assert parentOp.getParentOperators().size() == 1;
            Operator<? extends OperatorDesc> grandParentOp = parentOp.getParentOperators().get(0);
            oldReduceSinkParentOps.add(parentOp);
            newParentOps.add(grandParentOp);
        }
        pos++;
    }
    // create the map-join operator
    MapJoinOperator mapJoinOp = convertJoinOpMapJoinOp(conf, op, leftInputJoin, baseSrc, mapAliases, mapJoinPos, noCheckOuterJoin);
    // remove old parents
    for (pos = 0; pos < newParentOps.size(); pos++) {
        newParentOps.get(pos).replaceChild(oldReduceSinkParentOps.get(pos), mapJoinOp);
    }
    mapJoinOp.getParentOperators().removeAll(oldReduceSinkParentOps);
    mapJoinOp.setParentOperators(newParentOps);
    // make sure only map-joins can be performed.
    if (validateMapJoinTree) {
        validateMapJoinTypes(mapJoinOp);
    }
    return mapJoinOp;
}
Also used : LateralViewJoinOperator(org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) ScriptOperator(org.apache.hadoop.hive.ql.exec.ScriptOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) ArrayList(java.util.ArrayList) MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) JoinDesc(org.apache.hadoop.hive.ql.plan.JoinDesc) SMBJoinDesc(org.apache.hadoop.hive.ql.plan.SMBJoinDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) JoinCondDesc(org.apache.hadoop.hive.ql.plan.JoinCondDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 8 with JoinCondDesc

use of org.apache.hadoop.hive.ql.plan.JoinCondDesc in project hive by apache.

the class MapJoinOperator method canSkipJoinProcessing.

// If the loaded hash table is empty, for some conditions we can skip processing the big table rows.
protected boolean canSkipJoinProcessing(ExecMapperContext mapContext) {
    if (!canSkipReload(mapContext)) {
        return false;
    }
    JoinCondDesc[] joinConds = getConf().getConds();
    if (joinConds.length > 0) {
        for (JoinCondDesc joinCond : joinConds) {
            if (joinCond.getType() != JoinDesc.INNER_JOIN) {
                return false;
            }
        }
    } else {
        return false;
    }
    boolean skipJoinProcessing = false;
    for (int idx = 0; idx < mapJoinTables.length; ++idx) {
        if (idx == getConf().getPosBigTable()) {
            continue;
        }
        MapJoinTableContainer mapJoinTable = mapJoinTables[idx];
        if (mapJoinTable.size() == 0) {
            // If any table is empty, an inner join involving the tables should yield 0 rows.
            LOG.info("Hash table number " + idx + " is empty");
            skipJoinProcessing = true;
            break;
        }
    }
    return skipJoinProcessing;
}
Also used : MapJoinTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer) JoinCondDesc(org.apache.hadoop.hive.ql.plan.JoinCondDesc)

Example 9 with JoinCondDesc

use of org.apache.hadoop.hive.ql.plan.JoinCondDesc in project hive by apache.

the class HiveOpConverter method genJoin.

private static JoinOperator genJoin(RelNode join, ExprNodeDesc[][] joinExpressions, List<List<ExprNodeDesc>> filterExpressions, List<Operator<?>> children, String[] baseSrc, String tabAlias) throws SemanticException {
    // 1. Extract join type
    JoinCondDesc[] joinCondns;
    boolean semiJoin;
    boolean noOuterJoin;
    if (join instanceof HiveMultiJoin) {
        HiveMultiJoin hmj = (HiveMultiJoin) join;
        joinCondns = new JoinCondDesc[hmj.getJoinInputs().size()];
        for (int i = 0; i < hmj.getJoinInputs().size(); i++) {
            joinCondns[i] = new JoinCondDesc(new JoinCond(hmj.getJoinInputs().get(i).left, hmj.getJoinInputs().get(i).right, transformJoinType(hmj.getJoinTypes().get(i))));
        }
        semiJoin = false;
        noOuterJoin = !hmj.isOuterJoin();
    } else {
        joinCondns = new JoinCondDesc[1];
        semiJoin = join instanceof SemiJoin;
        JoinType joinType;
        if (semiJoin) {
            joinType = JoinType.LEFTSEMI;
        } else {
            joinType = extractJoinType((Join) join);
        }
        joinCondns[0] = new JoinCondDesc(new JoinCond(0, 1, joinType));
        noOuterJoin = joinType != JoinType.FULLOUTER && joinType != JoinType.LEFTOUTER && joinType != JoinType.RIGHTOUTER;
    }
    // 2. We create the join aux structures
    ArrayList<ColumnInfo> outputColumns = new ArrayList<ColumnInfo>();
    ArrayList<String> outputColumnNames = new ArrayList<String>(join.getRowType().getFieldNames());
    Operator<?>[] childOps = new Operator[children.size()];
    Map<String, Byte> reversedExprs = new HashMap<String, Byte>();
    Map<Byte, List<ExprNodeDesc>> exprMap = new HashMap<Byte, List<ExprNodeDesc>>();
    Map<Byte, List<ExprNodeDesc>> filters = new HashMap<Byte, List<ExprNodeDesc>>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    HashMap<Integer, Set<String>> posToAliasMap = new HashMap<Integer, Set<String>>();
    int outputPos = 0;
    for (int pos = 0; pos < children.size(); pos++) {
        // 2.1. Backtracking from RS
        ReduceSinkOperator inputRS = (ReduceSinkOperator) children.get(pos);
        if (inputRS.getNumParent() != 1) {
            throw new SemanticException("RS should have single parent");
        }
        Operator<?> parent = inputRS.getParentOperators().get(0);
        ReduceSinkDesc rsDesc = inputRS.getConf();
        int[] index = inputRS.getValueIndex();
        Byte tag = (byte) rsDesc.getTag();
        // 2.1.1. If semijoin...
        if (semiJoin && pos != 0) {
            exprMap.put(tag, new ArrayList<ExprNodeDesc>());
            childOps[pos] = inputRS;
            continue;
        }
        posToAliasMap.put(pos, new HashSet<String>(inputRS.getSchema().getTableNames()));
        List<String> keyColNames = rsDesc.getOutputKeyColumnNames();
        List<String> valColNames = rsDesc.getOutputValueColumnNames();
        Map<String, ExprNodeDesc> descriptors = buildBacktrackFromReduceSinkForJoin(outputPos, outputColumnNames, keyColNames, valColNames, index, parent, baseSrc[pos]);
        List<ColumnInfo> parentColumns = parent.getSchema().getSignature();
        for (int i = 0; i < index.length; i++) {
            ColumnInfo info = new ColumnInfo(parentColumns.get(i));
            info.setInternalName(outputColumnNames.get(outputPos));
            info.setTabAlias(tabAlias);
            outputColumns.add(info);
            reversedExprs.put(outputColumnNames.get(outputPos), tag);
            outputPos++;
        }
        exprMap.put(tag, new ArrayList<ExprNodeDesc>(descriptors.values()));
        colExprMap.putAll(descriptors);
        childOps[pos] = inputRS;
    }
    // 3. We populate the filters and filterMap structure needed in the join descriptor
    List<List<ExprNodeDesc>> filtersPerInput = Lists.newArrayList();
    int[][] filterMap = new int[children.size()][];
    for (int i = 0; i < children.size(); i++) {
        filtersPerInput.add(new ArrayList<ExprNodeDesc>());
    }
    // 3. We populate the filters structure
    for (int i = 0; i < filterExpressions.size(); i++) {
        int leftPos = joinCondns[i].getLeft();
        int rightPos = joinCondns[i].getRight();
        for (ExprNodeDesc expr : filterExpressions.get(i)) {
            // We need to update the exprNode, as currently
            // they refer to columns in the output of the join;
            // they should refer to the columns output by the RS
            int inputPos = updateExprNode(expr, reversedExprs, colExprMap);
            if (inputPos == -1) {
                inputPos = leftPos;
            }
            filtersPerInput.get(inputPos).add(expr);
            if (joinCondns[i].getType() == JoinDesc.FULL_OUTER_JOIN || joinCondns[i].getType() == JoinDesc.LEFT_OUTER_JOIN || joinCondns[i].getType() == JoinDesc.RIGHT_OUTER_JOIN) {
                if (inputPos == leftPos) {
                    updateFilterMap(filterMap, leftPos, rightPos);
                } else {
                    updateFilterMap(filterMap, rightPos, leftPos);
                }
            }
        }
    }
    for (int pos = 0; pos < children.size(); pos++) {
        ReduceSinkOperator inputRS = (ReduceSinkOperator) children.get(pos);
        ReduceSinkDesc rsDesc = inputRS.getConf();
        Byte tag = (byte) rsDesc.getTag();
        filters.put(tag, filtersPerInput.get(pos));
    }
    // 4. We create the join operator with its descriptor
    JoinDesc desc = new JoinDesc(exprMap, outputColumnNames, noOuterJoin, joinCondns, filters, joinExpressions);
    desc.setReversedExprs(reversedExprs);
    desc.setFilterMap(filterMap);
    JoinOperator joinOp = (JoinOperator) OperatorFactory.getAndMakeChild(childOps[0].getCompilationOpContext(), desc, new RowSchema(outputColumns), childOps);
    joinOp.setColumnExprMap(colExprMap);
    joinOp.setPosToAliasMap(posToAliasMap);
    joinOp.getConf().setBaseSrc(baseSrc);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Generated " + joinOp + " with row schema: [" + joinOp.getSchema() + "]");
    }
    return joinOp;
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) HashSet(java.util.HashSet) HiveMultiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) JoinCond(org.apache.hadoop.hive.ql.parse.JoinCond) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) SemiJoin(org.apache.calcite.rel.core.SemiJoin) HiveSemiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) JoinCondDesc(org.apache.hadoop.hive.ql.plan.JoinCondDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) JoinType(org.apache.hadoop.hive.ql.parse.JoinType) HiveJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin) SemiJoin(org.apache.calcite.rel.core.SemiJoin) HiveSemiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin) Join(org.apache.calcite.rel.core.Join) HiveMultiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) JoinDesc(org.apache.hadoop.hive.ql.plan.JoinDesc)

Example 10 with JoinCondDesc

use of org.apache.hadoop.hive.ql.plan.JoinCondDesc in project hive by apache.

the class CommonJoinOperator method genObject.

// creates objects in recursive manner
private void genObject(int aliasNum, boolean allLeftFirst, boolean allLeftNull) throws HiveException {
    JoinCondDesc joinCond = condn[aliasNum - 1];
    int type = joinCond.getType();
    int left = joinCond.getLeft();
    int right = joinCond.getRight();
    if (needsPostEvaluation && aliasNum == numAliases - 2) {
        int nextType = condn[aliasNum].getType();
        if (nextType == JoinDesc.RIGHT_OUTER_JOIN || nextType == JoinDesc.FULL_OUTER_JOIN) {
            // Initialize container to use for storing tuples before emitting them
            rowContainerPostFilteredOuterJoin = new HashMap<>();
        }
    }
    boolean[] skip = skipVectors[aliasNum];
    boolean[] prevSkip = skipVectors[aliasNum - 1];
    // search for match in the rhs table
    AbstractRowContainer<List<Object>> aliasRes = storage[order[aliasNum]];
    boolean needToProduceLeftRow = false;
    boolean producedRow = false;
    boolean done = false;
    boolean loopAgain = false;
    boolean tryLOForFO = type == JoinDesc.FULL_OUTER_JOIN;
    boolean rightFirst = true;
    AbstractRowContainer.RowIterator<List<Object>> iter = aliasRes.rowIter();
    int pos = 0;
    for (List<Object> rightObj = iter.first(); !done && rightObj != null; rightObj = loopAgain ? rightObj : iter.next(), rightFirst = loopAgain = false, pos++) {
        System.arraycopy(prevSkip, 0, skip, 0, prevSkip.length);
        boolean rightNull = rightObj == dummyObj[aliasNum];
        if (hasFilter(order[aliasNum])) {
            filterTags[aliasNum] = getFilterTag(rightObj);
        }
        skip[right] = rightNull;
        if (type == JoinDesc.INNER_JOIN) {
            innerJoin(skip, left, right);
        } else if (type == JoinDesc.LEFT_SEMI_JOIN) {
            if (innerJoin(skip, left, right)) {
                // if left-semi-join found a match, skipping the rest of the rows in the
                // rhs table of the semijoin
                done = true;
            }
        } else if (type == JoinDesc.LEFT_OUTER_JOIN || (type == JoinDesc.FULL_OUTER_JOIN && rightNull)) {
            int result = leftOuterJoin(skip, left, right);
            if (result < 0) {
                continue;
            }
            done = result > 0;
        } else if (type == JoinDesc.RIGHT_OUTER_JOIN || (type == JoinDesc.FULL_OUTER_JOIN && allLeftNull)) {
            if (allLeftFirst && !rightOuterJoin(skip, left, right) || !allLeftFirst && !innerJoin(skip, left, right)) {
                continue;
            }
        } else if (type == JoinDesc.FULL_OUTER_JOIN) {
            if (tryLOForFO && leftOuterJoin(skip, left, right) > 0) {
                loopAgain = allLeftFirst;
                done = !loopAgain;
                tryLOForFO = false;
            } else if (allLeftFirst && !rightOuterJoin(skip, left, right) || !allLeftFirst && !innerJoin(skip, left, right)) {
                continue;
            }
        }
        intermediate[aliasNum] = rightObj;
        if (aliasNum == numAliases - 1) {
            if (!(allLeftNull && rightNull)) {
                needToProduceLeftRow = true;
                if (needsPostEvaluation) {
                    // This is only executed for outer joins with residual filters
                    boolean forward = createForwardJoinObject(skipVectors[numAliases - 1]);
                    producedRow |= forward;
                    if (!rightNull && (type == JoinDesc.RIGHT_OUTER_JOIN || type == JoinDesc.FULL_OUTER_JOIN)) {
                        if (forward) {
                            // This record produced a result this time, remove it from the storage
                            // as it will not need to produce a result with NULL values anymore
                            rowContainerPostFilteredOuterJoin.put(pos, null);
                        } else {
                            // we should produce a result
                            if (!rowContainerPostFilteredOuterJoin.containsKey(pos)) {
                                Object[] row = Arrays.copyOfRange(forwardCache, offsets[aliasNum], offsets[aliasNum + 1]);
                                rowContainerPostFilteredOuterJoin.put(pos, row);
                            }
                        }
                    }
                } else {
                    createForwardJoinObject(skipVectors[numAliases - 1]);
                }
            }
        } else {
            // recursively call the join the other rhs tables
            genObject(aliasNum + 1, allLeftFirst && rightFirst, allLeftNull && rightNull);
        }
    }
    // Consolidation for outer joins
    if (needsPostEvaluation && aliasNum == numAliases - 1 && needToProduceLeftRow && !producedRow && !allLeftNull) {
        if (type == JoinDesc.LEFT_OUTER_JOIN || type == JoinDesc.FULL_OUTER_JOIN) {
            // If it is a LEFT / FULL OUTER JOIN and the left record did not produce
            // results, we need to take that record, replace the right side with NULL
            // values, and produce the records
            int i = numAliases - 1;
            for (int j = offsets[i]; j < offsets[i + 1]; j++) {
                forwardCache[j] = null;
            }
            internalForward(forwardCache, outputObjInspector);
            countAfterReport = 0;
        }
    } else if (needsPostEvaluation && aliasNum == numAliases - 2) {
        int nextType = condn[aliasNum].getType();
        if (nextType == JoinDesc.RIGHT_OUTER_JOIN || nextType == JoinDesc.FULL_OUTER_JOIN) {
            // If it is a RIGHT / FULL OUTER JOIN, we need to iterate through the row container
            // that contains all the right records that did not produce results. Then, for each
            // of those records, we replace the left side with NULL values, and produce the
            // records.
            // Observe that we only enter this block when we have finished iterating through
            // all the left and right records (aliasNum == numAliases - 2), and thus, we have
            // tried to evaluate the post-filter condition on every possible combination.
            Arrays.fill(forwardCache, null);
            for (Object[] row : rowContainerPostFilteredOuterJoin.values()) {
                if (row == null) {
                    continue;
                }
                System.arraycopy(row, 0, forwardCache, offsets[numAliases - 1], row.length);
                internalForward(forwardCache, outputObjInspector);
                countAfterReport = 0;
            }
        }
    }
}
Also used : ArrayList(java.util.ArrayList) List(java.util.List) AbstractRowContainer(org.apache.hadoop.hive.ql.exec.persistence.AbstractRowContainer) JoinCondDesc(org.apache.hadoop.hive.ql.plan.JoinCondDesc)

Aggregations

JoinCondDesc (org.apache.hadoop.hive.ql.plan.JoinCondDesc)13 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)7 ArrayList (java.util.ArrayList)6 JoinDesc (org.apache.hadoop.hive.ql.plan.JoinDesc)6 List (java.util.List)5 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)5 Operator (org.apache.hadoop.hive.ql.exec.Operator)5 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)5 HashMap (java.util.HashMap)4 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)4 MapJoinDesc (org.apache.hadoop.hive.ql.plan.MapJoinDesc)4 HashSet (java.util.HashSet)3 LinkedHashMap (java.util.LinkedHashMap)3 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)3 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)3 SMBMapJoinOperator (org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator)3 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)3 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)3 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)3 ReduceSinkDesc (org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)3