Search in sources :

Example 1 with AbstractRowContainer

use of org.apache.hadoop.hive.ql.exec.persistence.AbstractRowContainer in project hive by apache.

the class CommonJoinOperator method genObject.

// creates objects in recursive manner
private void genObject(int aliasNum, boolean allLeftFirst, boolean allLeftNull) throws HiveException {
    JoinCondDesc joinCond = condn[aliasNum - 1];
    int type = joinCond.getType();
    int left = joinCond.getLeft();
    int right = joinCond.getRight();
    if (needsPostEvaluation && aliasNum == numAliases - 2) {
        int nextType = condn[aliasNum].getType();
        if (nextType == JoinDesc.RIGHT_OUTER_JOIN || nextType == JoinDesc.FULL_OUTER_JOIN) {
            // Initialize container to use for storing tuples before emitting them
            rowContainerPostFilteredOuterJoin = new HashMap<>();
        }
    }
    boolean[] skip = skipVectors[aliasNum];
    boolean[] prevSkip = skipVectors[aliasNum - 1];
    // search for match in the rhs table
    AbstractRowContainer<List<Object>> aliasRes = storage[order[aliasNum]];
    boolean needToProduceLeftRow = false;
    boolean producedRow = false;
    boolean done = false;
    boolean loopAgain = false;
    boolean tryLOForFO = type == JoinDesc.FULL_OUTER_JOIN;
    boolean rightFirst = true;
    AbstractRowContainer.RowIterator<List<Object>> iter = aliasRes.rowIter();
    int pos = 0;
    for (List<Object> rightObj = iter.first(); !done && rightObj != null; rightObj = loopAgain ? rightObj : iter.next(), rightFirst = loopAgain = false, pos++) {
        System.arraycopy(prevSkip, 0, skip, 0, prevSkip.length);
        boolean rightNull = rightObj == dummyObj[aliasNum];
        if (hasFilter(order[aliasNum])) {
            filterTags[aliasNum] = getFilterTag(rightObj);
        }
        skip[right] = rightNull;
        if (type == JoinDesc.INNER_JOIN) {
            innerJoin(skip, left, right);
        } else if (type == JoinDesc.LEFT_SEMI_JOIN) {
            if (innerJoin(skip, left, right)) {
                // if left-semi-join found a match and we do not have any additional predicates,
                // skipping the rest of the rows in the rhs table of the semijoin
                done = !needsPostEvaluation;
            }
        } else if (type == JoinDesc.LEFT_OUTER_JOIN || (type == JoinDesc.FULL_OUTER_JOIN && rightNull)) {
            int result = leftOuterJoin(skip, left, right);
            if (result < 0) {
                continue;
            }
            done = result > 0;
        } else if (type == JoinDesc.RIGHT_OUTER_JOIN || (type == JoinDesc.FULL_OUTER_JOIN && allLeftNull)) {
            if (allLeftFirst && !rightOuterJoin(skip, left, right) || !allLeftFirst && !innerJoin(skip, left, right)) {
                continue;
            }
        } else if (type == JoinDesc.FULL_OUTER_JOIN) {
            if (tryLOForFO && leftOuterJoin(skip, left, right) > 0) {
                loopAgain = allLeftFirst;
                done = !loopAgain;
                tryLOForFO = false;
            } else if (allLeftFirst && !rightOuterJoin(skip, left, right) || !allLeftFirst && !innerJoin(skip, left, right)) {
                continue;
            }
        }
        intermediate[aliasNum] = rightObj;
        if (aliasNum == numAliases - 1) {
            if (!(allLeftNull && rightNull)) {
                needToProduceLeftRow = true;
                if (needsPostEvaluation) {
                    // This is only executed for outer joins with residual filters
                    boolean forward = createForwardJoinObject(skipVectors[numAliases - 1]);
                    producedRow |= forward;
                    done = (type == JoinDesc.LEFT_SEMI_JOIN) && forward;
                    if (!rightNull && (type == JoinDesc.RIGHT_OUTER_JOIN || type == JoinDesc.FULL_OUTER_JOIN)) {
                        if (forward) {
                            // This record produced a result this time, remove it from the storage
                            // as it will not need to produce a result with NULL values anymore
                            rowContainerPostFilteredOuterJoin.put(pos, null);
                        } else {
                            // we should produce a result
                            if (!rowContainerPostFilteredOuterJoin.containsKey(pos)) {
                                Object[] row = Arrays.copyOfRange(forwardCache, offsets[aliasNum], offsets[aliasNum + 1]);
                                rowContainerPostFilteredOuterJoin.put(pos, row);
                            }
                        }
                    }
                } else {
                    createForwardJoinObject(skipVectors[numAliases - 1]);
                }
            }
        } else {
            // recursively call the join the other rhs tables
            genObject(aliasNum + 1, allLeftFirst && rightFirst, allLeftNull && rightNull);
        }
    }
    // Consolidation for outer joins
    if (needsPostEvaluation && aliasNum == numAliases - 1 && needToProduceLeftRow && !producedRow && !allLeftNull) {
        if (type == JoinDesc.LEFT_OUTER_JOIN || type == JoinDesc.FULL_OUTER_JOIN) {
            // If it is a LEFT / FULL OUTER JOIN and the left record did not produce
            // results, we need to take that record, replace the right side with NULL
            // values, and produce the records
            int i = numAliases - 1;
            for (int j = offsets[i]; j < offsets[i + 1]; j++) {
                forwardCache[j] = null;
            }
            internalForward(forwardCache, outputObjInspector);
            countAfterReport = 0;
        }
    } else if (needsPostEvaluation && aliasNum == numAliases - 2) {
        int nextType = condn[aliasNum].getType();
        if (nextType == JoinDesc.RIGHT_OUTER_JOIN || nextType == JoinDesc.FULL_OUTER_JOIN) {
            // If it is a RIGHT / FULL OUTER JOIN, we need to iterate through the row container
            // that contains all the right records that did not produce results. Then, for each
            // of those records, we replace the left side with NULL values, and produce the
            // records.
            // Observe that we only enter this block when we have finished iterating through
            // all the left and right records (aliasNum == numAliases - 2), and thus, we have
            // tried to evaluate the post-filter condition on every possible combination.
            Arrays.fill(forwardCache, null);
            for (Object[] row : rowContainerPostFilteredOuterJoin.values()) {
                if (row == null) {
                    continue;
                }
                System.arraycopy(row, 0, forwardCache, offsets[numAliases - 1], row.length);
                internalForward(forwardCache, outputObjInspector);
                countAfterReport = 0;
            }
        }
    }
}
Also used : ArrayList(java.util.ArrayList) List(java.util.List) AbstractRowContainer(org.apache.hadoop.hive.ql.exec.persistence.AbstractRowContainer) JoinCondDesc(org.apache.hadoop.hive.ql.plan.JoinCondDesc)

Aggregations

ArrayList (java.util.ArrayList)1 List (java.util.List)1 AbstractRowContainer (org.apache.hadoop.hive.ql.exec.persistence.AbstractRowContainer)1 JoinCondDesc (org.apache.hadoop.hive.ql.plan.JoinCondDesc)1